# Import Python Libraries and Load Anime Data Function

In [1]:
from math import sqrt
import json
import pandas as pd
import numpy as np
from csv import reader
from sklearn import preprocessing 
import operator


def load_csv(filename):
    dataset = list()
    with open(filename, 'r') as file:
        csv_reader = reader(file)
        for row in csv_reader:
            if not row:
                continue
            dataset.append(row)
    return dataset

# Convert Anime Genre Strings to Genre Binary

In [2]:
import re

def get_genres_bin(genres):
    pattern = re.compile(r', ')
    genres_bin = pattern.split(genres)
    return genres_bin

# Calculate Distance of Two Animies by Euclidean Distance 

In [3]:
def calc_euclidean_distance(movie_1, movie_2):
    genre_distance = 0.0
    rating_distance = 0.0
    members_distance = 0.0
    
    movie_1_gen_bin = get_genres_bin(movie_1[6])
    movie_2_gen_bin = get_genres_bin(movie_2[6])
    
    for x in range(len(movie_2_gen_bin)):
        if len(movie_1_gen_bin) == len(movie_2_gen_bin):
            genre_distance += (float(movie_1_gen_bin[x]) - float(movie_2_gen_bin[x]))**2 
        
    normal_genre_distance = genre_distance / len(movie_1_gen_bin)
        
    if (([not s or s.isspace() for s in movie_1[5]]) and ([not sp or sp.isspace() for sp in movie_2[5]])):
        if movie_1[5].isdigit():
            rating_distance += (float(movie_1[5]) - float(movie_2[5]))**2
    
    if (([not s or s.isspace() for s in movie_1[7]]) and ([not sp or sp.isspace() for sp in movie_2[7]])):
        if movie_1[7].isdigit():
            members_array = np.array([[int(movie_1[7]), int(movie_2[7])]])
            members_array = preprocessing.normalize(members_array)
            members_distance += (members_array[0][0] - members_array[0][1])**2
    
    measuring_dimension = 3
    
    total_distance = (normal_genre_distance + rating_distance +  members_distance) / measuring_dimension
    return sqrt(total_distance)



# Calculate Distance of Two Animies by Cosine Similarity

In [4]:
# import operator
# from scipy.spatial import distance
# Cosine similarity formular
# cos(d1, d2) = (d1 . d2) / (\\d1\\ * \\d2\\)

def calc_cosine_similarity(anime_one, anime_two):
    
    if len(anime_one[6]) == len(anime_two[6]):
        genres_bin_one = get_genres_bin(anime_one[6])
        genres_bin_two = get_genres_bin(anime_two[6])
        
        anime_one_rating = anime_one[5]
        anime_one_rating = float(anime_one_rating)
        anime_two_rating = anime_two[5]
        anime_two_rating = float(anime_two_rating)



        rating_dot_product = anime_one_rating * float(anime_two[5])
        rating_magnitudes = rating_dot_product / sqrt((anime_one_rating**2) * (anime_two_rating**2))
    
        anime_one_popularity = float(anime_one[7])
        anime_two_popularity = float(anime_two[7])
        popularity_dot_product = anime_one_popularity * anime_two_popularity
        popularity_magnitudes = popularity_dot_product / sqrt((anime_one_popularity**2) * (anime_two_popularity**2))

        genres_dot_products = 0.0
        genres_one_magnitude = 0.0
        genres_two_magnitude = 0.0

        for i in range(len(genres_bin_two)):
            genres_dot_products += float(genres_bin_one[i]) * float(genres_bin_two[i])
            genres_one_magnitude += float(genres_bin_one[i])**2
            genres_two_magnitude += float(genres_bin_two[i])**2

        product_of_magnitudes = sqrt(genres_one_magnitude) * sqrt(genres_two_magnitude)
        genre_magnitudes = genres_dot_products / product_of_magnitudes
        genre_magnitudes = genre_magnitudes / len(genres_bin_two)

        measuring_dimension = 3
        cosine_similarity = (rating_magnitudes + popularity_magnitudes + genre_magnitudes) / measuring_dimension
        cosine_similarity = cosine_similarity / measuring_dimension

        return round(cosine_similarity, 4)
    return -1




# Anime Recommendation by K Nearest Neighbor ML Algorithm

In [5]:
def get_nearest_neighbors(anime_data, search_anime, K):
    distances = []
    for anime in anime_data:
        if anime != search_anime:
            dist = calc_euclidean_distance(anime, search_anime)
            if dist != -1:
                distances.append((anime, dist))
    distances.sort(key=operator.itemgetter(1)) # Sort the distances of animes
    
    neighbors = []
    for i in range(K):
        percent_matched = int(100 - distances[i][1])
        neighbors.append( (distances[i][0], percent_matched) )
    return neighbors

# User Friendly Anime Search by Suggesting Possible Anime Titles

In [6]:
def search_input_title(anime_data, title):
    for anime in anime_data:
        if title == anime[1]:
            return anime
        elif input_title in anime[1]:
            print(f'Do you want similar anime to "{anime[1]}"?')
            agree = int(input('Enter 1 for YES or 0 for NO: '))
            if agree:
                return anime 
    return ""



# Present the List of Search Results

In [7]:
anime_data = load_csv("/Users/brangmai/Desktop/Capstone/Recommender-System-WebApp/anime.csv")


input_title = input("Enter an anime title: ")
found_anime = search_input_title(anime_data, input_title)

if found_anime == "": 
    print(f'{input_title} cannot be found')
    
else:    
    K = 20
    
    nearest_neighbors = get_nearest_neighbors(anime_data, found_anime, K)
    print(f"\nThe most similar animes to {found_anime[1]}: ")
    print("------------------------------------------")
    
    for index, neighbor in enumerate(nearest_neighbors):
        if index != 0:   # Avoiding the culumn titles
            print(f'{index}. {neighbor[0][1]} ({neighbor[1]}% matched)')



Enter an anime title: Monster

The most similar animes to Monster: 
------------------------------------------
1. Higurashi no Naku Koro ni Kai (99% matched)
2. Danganronpa: Kibou no Gakuen to Zetsubou no Koukousei The Animation (99% matched)
3. Ergo Proxy (99% matched)
4. Shinsekai yori (99% matched)
5. Gangsta. (99% matched)
6. Higurashi no Naku Koro ni (99% matched)
7. Shiki (99% matched)
8. Koutetsujou no Kabaneri (99% matched)
9. Gosick (99% matched)
10. Psycho-Pass 2 (99% matched)
11. Zankyou no Terror (99% matched)
12. Ghost in the Shell (99% matched)
13. Serial Experiments Lain (99% matched)
14. NHK ni Youkoso! (99% matched)
15. Jigoku Shoujo (99% matched)
16. Re:Zero kara Hajimeru Isekai Seikatsu (99% matched)
17. Black Lagoon: The Second Barrage (99% matched)
18. Death Parade (99% matched)
19. Zetsuen no Tempest (99% matched)


# Search by Cosine Similarity

In [None]:
def get_nearest_neighbors_cosine(anime_data, search_anime, K):
    distances = []
    for index, anime in enumerate(anime_data):
        if index > 0 and anime[1] != search_anime[1]:
            dist = calc_cosine_similarity(anime, search_anime)
            if dist != -1:
                distances.append((anime, dist))
    distances.sort(key=operator.itemgetter(1)) # Sort the distances of animes
    
    neighbors = []
    for i in range(K):
        percent_matched = int(100 - distances[i][1])
        neighbors.append( (distances[i][0], percent_matched) )
    return neighbors

In [None]:
data = load_csv("/Users/brangmai/Desktop/Capstone/Recommender-System-WebApp/anime.csv")


input_title = input("Enter an anime title: ")
found_anime = search_input_title(data, input_title)

if found_anime == "": 
    print(f'{input_title} cannot be found')
    
else:    
    K = 20
    
    nearest_neighbors = get_nearest_neighbors_cosine(data, found_anime, K)
    print(f"\nThe most similar animes to {found_anime[1]}: ")
    print("------------------------------------------")
    
    for index, neighbor in enumerate(nearest_neighbors):
        if index != 0:   # Avoiding the culumn titles
            print(f'{index}. {neighbor[0][1]} ({neighbor[1]}% matched)')

In [None]:
input_genres = [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]

K = 10
anime_id = str(found_anime_key)

nearest_neighbors = nearest_neighbors(input_genres, K)
print(f"\nRecommended Animes: ")
print("------------------------")
no = 1
for neighbor, percentage in nearest_neighbors:
    print(f'{no}. {anime_dictionary[neighbor][0]}, ({percentage}% matched)')
    no += 1     

