# Import Python Libraries and Load Anime Data Function

In [8]:
from math import sqrt
import json
import pandas as pd
import numpy as np
from csv import reader
from sklearn import preprocessing 
import operator


def load_csv(filename):
    dataset = list()
    with open(filename, 'r') as file:
        csv_reader = reader(file)
        for row in csv_reader:
            if not row:
                continue
            dataset.append(row)
    return dataset

# Convert Anime Genre Strings to Genre Binary

In [9]:
import re

def get_genres_bin(genres):
    pattern = re.compile(r', ')
    genres_bin = pattern.split(genres)
    return genres_bin

# Calculate Distance of Two Animies by Euclidean Distance 

In [10]:
def calc_euclidean_distance(movie_1, movie_2):
    genre_distance = 0.0
    rating_distance = 0.0
    members_distance = 0.0
    
    movie_1_gen_bin = get_genres_bin(movie_1[6])
    movie_2_gen_bin = get_genres_bin(movie_2[6])
    
    for x in range(len(movie_2_gen_bin)):
        if len(movie_1_gen_bin) == len(movie_2_gen_bin):
            genre_distance += (float(movie_1_gen_bin[x]) - float(movie_2_gen_bin[x]))**2 
        
    normal_genre_distance = genre_distance / len(movie_1_gen_bin)
        
    if (([not s or s.isspace() for s in movie_1[5]]) and ([not sp or sp.isspace() for sp in movie_2[5]])):
        if movie_1[5].isdigit():
            rating_distance += (float(movie_1[5]) - float(movie_2[5]))**2
    
    if (([not s or s.isspace() for s in movie_1[7]]) and ([not sp or sp.isspace() for sp in movie_2[7]])):
        if movie_1[7].isdigit():
            members_array = np.array([[int(movie_1[7]), int(movie_2[7])]])
            members_array = preprocessing.normalize(members_array)
            members_distance += (members_array[0][0] - members_array[0][1])**2
    
    measuring_dimension = 3
    
    total_distance = (normal_genre_distance + rating_distance +  members_distance) / measuring_dimension
    return sqrt(total_distance)

data = load_csv("/Users/brangmai/Desktop/Capstone/Recommender-System-WebApp/anime.csv")
dist = calc_euclidean_distance(data[1], data[10])
print(dist)
print(data[5])




0.38632127823460755
['8', 'Beet the Vandel Buster', 'Adventure, Fantasy, Shounen, Supernatural', 'TV', '52', '7.06', '0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0', '9848']


# Calculate Distance of Two Animies by Cosine Similarity

In [11]:
# import operator
# from scipy.spatial import distance
# Cosine similarity formular
# cos(d1, d2) = (d1 . d2) / (\\d1\\ * \\d2\\)

def calc_cosine_similarity(anime_one, anime_two):
    genres_bin_one = get_genres_bin(anime_one[6])
    genres_bin_two = get_genres_bin(anime_two[6])

    anime_one_rating = anime_one[5]
    anime_one_rating = float(anime_one_rating)
    anime_two_rating = anime_two[5]
    anime_two_rating = float(anime_two_rating)



    rating_dot_product = anime_one_rating * float(anime_two[5])
    rating_magnitudes = rating_dot_product / sqrt((anime_one_rating**2) * (anime_two_rating**2))

    anime_one_popularity = float(anime_one[7])
    anime_two_popularity = float(anime_two[7])
    popularity_dot_product = anime_one_popularity * anime_two_popularity
    popularity_magnitudes = popularity_dot_product / sqrt((anime_one_popularity**2) * (anime_two_popularity**2))

    genres_dot_products = 0.0
    genres_one_magnitude = 0.0
    genres_two_magnitude = 0.0

    for i in range(len(genres_bin_two)):
        genres_dot_products += float(genres_bin_one[i]) * float(genres_bin_two[i])
        genres_one_magnitude += float(genres_bin_one[i])**2
        genres_two_magnitude += float(genres_bin_two[i])**2

    product_of_magnitudes = sqrt(genres_one_magnitude) * sqrt(genres_two_magnitude)
    genre_magnitudes = genres_dot_products / product_of_magnitudes
    genre_magnitudes = genre_magnitudes / len(genres_bin_two)

    measuring_dimension = 3
    cosine_similarity = (rating_magnitudes + popularity_magnitudes + genre_magnitudes) / measuring_dimension
    cosine_similarity = cosine_similarity / measuring_dimension

    return round(cosine_similarity, 4)
    
data = load_csv("/Users/brangmai/Desktop/Capstone/Recommender-System-WebApp/anime.csv")
dist = calc_cosine_similarity(data[5], data[5])
print(f'Cosine similarity {dist}')
print(len(data))



Cosine similarity 0.2248
12295


# Anime Recommendation by K Nearest Neighbor ML Algorithm

In [12]:
def get_nearest_neighbors(anime_data, search_anime, K):
    distances = []
    for anime in anime_data:
        if anime != search_anime:
            dist = calc_euclidean_distance(anime, search_anime)
            if dist != -1:
                distances.append((anime, dist))
    distances.sort(key=operator.itemgetter(1)) # Sort the distances of animes
    
    neighbors = []
    for i in range(K):
        percent_matched = int(100 - distances[i][1])
        neighbors.append( (distances[i][0], percent_matched) )
    return neighbors

# User Friendly Anime Search by Suggesting Possible Anime Titles

In [13]:
def search_input_title(anime_data, title):
    for anime in anime_data:
        if title == anime[1]:
            return anime
        elif input_title in anime[1]:
            print(f'Do you want similar anime to "{anime[1]}"?')
            agree = int(input('Enter 1 for YES or 0 for NO: '))
            if agree:
                return anime 
    return ""



# Present the List of Search Results

In [14]:
anime_data = load_csv("/Users/brangmai/Desktop/Capstone/Recommender-System-WebApp/anime.csv")


input_title = input("Enter an anime title: ")
found_anime = search_input_title(anime_data, input_title)

if found_anime == "": 
    print(f'{input_title} cannot be found')
    
else:    
    K = 20
    
    nearest_neighbors = get_nearest_neighbors(anime_data, found_anime, K)
    print(f"\nThe most similar animes to {found_anime[1]}: ")
    print("------------------------------------------")
    
    for index, neighbor in enumerate(nearest_neighbors):
        if index != 0:   # Avoiding the culumn titles
            print(f'{index}. {neighbor[0][1]} ({neighbor[1]}% matched)')



Enter an anime title: School
Do you want similar anime to "School Rumble"?
Enter 1 for YES or 0 for NO: 1

The most similar animes to School Rumble: 
------------------------------------------
1. Ore no Nounai Sentakushi ga, Gakuen Love Comedy wo Zenryoku de Jama Shiteiru (99% matched)
2. Nisekoi: (99% matched)
3. Ansatsu Kyoushitsu (TV) 2nd Season (99% matched)
4. Special A (99% matched)
5. Baka to Test to Shoukanjuu Ni! (99% matched)
6. Kuroko no Basket 3rd Season (99% matched)
7. Hidan no Aria (99% matched)
8. Seto no Hanayome (99% matched)
9. Bokura wa Minna Kawaisou (99% matched)
10. Amagami SS (99% matched)
11. Ore no Kanojo to Osananajimi ga Shuraba Sugiru (99% matched)
12. Bakuman. 2nd Season (99% matched)
13. Inu x Boku SS (99% matched)
14. Seitokai Yakuindomo (99% matched)
15. Ookami Shoujo to Kuro Ouji (99% matched)
16. Haikyuu!! Second Season (99% matched)
17. Sayonara Zetsubou Sensei (99% matched)
18. Nekomonogatari: Kuro (99% matched)
19. Shokugeki no Souma: Ni no Sara (9

# Search by Cosine Similarity

In [15]:
def get_nearest_neighbors_cosine(anime_data, search_anime, K):
    distances = []
    for index, anime in enumerate(anime_data):
        if index > 0 and anime[1] != search_anime[1]:
            dist = calc_cosine_similarity(anime, search_anime)
            if dist != -1:
                distances.append((anime, dist))
    distances.sort(key=operator.itemgetter(1)) # Sort the distances of animes
    
    neighbors = []
    for i in range(K):
        percent_matched = int(100 - distances[i][1])
        neighbors.append( (distances[i][0], percent_matched) )
    return neighbors

In [16]:
data = load_csv("/Users/brangmai/Desktop/Capstone/Recommender-System-WebApp/anime.csv")


input_title = input("Enter an anime title: ")
found_anime = search_input_title(data, input_title)

if found_anime == "": 
    print(f'{input_title} cannot be found')
    
else:    
    K = 20
    
    nearest_neighbors = get_nearest_neighbors_cosine(data, found_anime, K)
    print(f"\nThe most similar animes to {found_anime[1]}: ")
    print("------------------------------------------")
    
    for index, neighbor in enumerate(nearest_neighbors):
        if index != 0:   # Avoiding the culumn titles
            print(f'{index}. {neighbor[0][1]} ({neighbor[1]}% matched)')

Enter an anime title: Monster


ValueError: could not convert string to float: ''

In [None]:
input_genres = [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]

K = 10
anime_id = str(found_anime_key)

nearest_neighbors = nearest_neighbors(input_genres, K)
print(f"\nRecommended Animes: ")
print("------------------------")
no = 1
for neighbor, percentage in nearest_neighbors:
    print(f'{no}. {anime_dictionary[neighbor][0]}, ({percentage}% matched)')
    no += 1     

