In [1]:
import numpy as np
import pandas as pd

In [2]:
from sklearn.feature_extraction.text import CountVectorizer

In [3]:
text = ["London Paris London", "Paris Paris London"]
cv = CountVectorizer() #represents text as vectors/matrix
count_matrix = cv.fit_transform(text) #result is a sparse matrix

print(cv.get_feature_names_out()) #outputs the feature list given to cv
print(count_matrix.toarray())

#The output indicates that the word london appears 2 times in the first string of text (A) and once in the second string (B)
#Now we need to find the cosine similarity between vectors to see how similar they are

['london' 'paris']
[[2 1]
 [1 2]]


In [4]:
from sklearn.metrics.pairwise import cosine_similarity

In [5]:
similarity_scores = cosine_similarity(count_matrix)
print(similarity_scores)

#Output is interpreted as follows:
#        Text A(string 1)        Text B(string 2)
#
#Text A    1.                      0.8
#
#Text B    0.8                     1.

#Text A is similar to itself 100% and Text A is similar to Text B 80% (symmetric matrix)

[[1.  0.8]
 [0.8 1. ]]


In [11]:
df = pd.read_csv("Data/anime.csv")
df= df.reset_index()
#df.head()

df.columns
features = ["Score","Rating","Producers","Ranked","Plan to Watch","Popularity","Type","Genres"]

def combine_features(r):
    return str(r['Score']) + " " + str(r['Rating']) + " " + r['Producers'] + " " + str(r['Ranked']) + " " + str(r['Plan to Watch']) + " " +str(r['Popularity']) + " " +r['Type'] + " " + r['Genres']

for feature in features:
    df[feature] = df[feature].fillna("")
df["combined_features"] = df.apply(combine_features,axis =1)

df.head()

Unnamed: 0,index,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,...,Score-9,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1,combined_features
0,0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",Cowboy Bebop,カウボーイビバップ,TV,26,"Apr 3, 1998 to Apr 24, 1999",...,182126.0,131625.0,62330.0,20688.0,8904.0,3184.0,1357.0,741.0,1580.0,8.78 R - 17+ (violence & profanity) Bandai Vis...
1,1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space",Cowboy Bebop:The Movie,カウボーイビバップ 天国の扉,Movie,1,"Sep 1, 2001",...,49201.0,49505.0,22632.0,5805.0,1877.0,577.0,221.0,109.0,379.0,"8.39 R - 17+ (violence & profanity) Sunrise, B..."
2,2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen",Trigun,トライガン,TV,26,"Apr 1, 1998 to Sep 30, 1998",...,75651.0,86142.0,49432.0,15376.0,5838.0,1965.0,664.0,316.0,533.0,8.24 PG-13 - Teens 13 or older Victor Entertai...
3,3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",Witch Hunter Robin,Witch Hunter ROBIN (ウイッチハンターロビン),TV,26,"Jul 2, 2002 to Dec 24, 2002",...,4806.0,10128.0,11618.0,5709.0,2920.0,1083.0,353.0,164.0,131.0,"7.27 PG-13 - Teens 13 or older TV Tokyo, Banda..."
4,4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",Beet the Vandel Buster,冒険王ビィト,TV,52,"Sep 30, 2004 to Sep 29, 2005",...,529.0,1242.0,1713.0,1068.0,634.0,265.0,83.0,50.0,27.0,"6.98 PG - Children TV Tokyo, Dentsu 3710.0 339..."


In [12]:
cv_anime = CountVectorizer()
count_matrix_anime = cv_anime.fit_transform(df["combined_features"])

cosine_similarity_anime = cosine_similarity(count_matrix_anime)

In [13]:
def get_id_from_title(name):
    return df[df.Name == name]["index"].values[0]

def get_title_from_id(id):
    return df[df.index == id]["Name"].values[0]

In [14]:
anime_user = input("Enter an anime: ")
anime_index = get_id_from_title(anime_user)
similar_animes = list(enumerate(cosine_similarity_anime[anime_index]))

sorted_anime_recommendations = sorted(similar_animes,key=lambda x:x[1], reverse = True)[1:]
# print(sorted_anime_recommendations)

print("Top 10 Recommendations for " + anime_user + " :")
for i in range(10):
    print(get_title_from_id(sorted_anime_recommendations[i][0]))


Top 10 Recommendations for Naruto :
Boruto: Naruto Next Generations
Naruto: Shippuuden
Bleach
Dragon Ball Z
Naruto: Shippuuden Movie 5 - Blood Prison
Naruto: Shippuuden Movie 6 - Road to Ninja
Dragon Ball
Rekka no Honoo
Bleach Movie 1: Memories of Nobody
Kenyuu Densetsu Yaiba
