[View in Colaboratory](https://colab.research.google.com/github/Sriyesh/Recommender-System/blob/master/Anime_Recommender.ipynb)

In [0]:
!pip install pydrive

In [0]:
from google.colab import auth
auth.authenticate_user()

from pydrive.drive import GoogleDrive
from pydrive.auth import GoogleAuth
from oauth2client.client import GoogleCredentials
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

myfile = drive.CreateFile({'id': '1-iHXPQIhdILjIHRxKtprn-4xWCOGh2t0'})
myfile.GetContentFile('anime.csv')

In [8]:
#import dependence
import pandas as pd 
from sklearn.feature_extraction.text import TfidfVectorizer
#load dataset
import io
data = pd.read_csv('anime.csv',low_memory=False)
data.shape

(12294, 7)

In [9]:
data.head(3)

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262


In [10]:
#calculate the average rating
c = data['rating'].mean()
print(c)

6.473901690981432


In [11]:
# calculate minimum number of votes required to be in chart
m = data['members'].quantile(0.90)
print(m)

45188.7


In [12]:
#filter out all qualified anime into a new dataframe
q_anime = data.copy().loc[data['members']>=m]
q_anime.shape

(1230, 7)

In [0]:
# Function that computes the weighted rating of each anime

def weighted_rating(x,m=m,c=c):
    v = x['members']
    R = x['rating']
    #calculation based on IMDB formula
    return (v/(v+m) * R) + (m/(m+v)*c)


In [0]:
#define a new feature 'score' and calculate its value with weighted_rating()
q_anime['score'] = q_anime.apply(weighted_rating, axis=1)

In [15]:
#sort the movie based on score
q_anime = q_anime.sort_values('score',ascending=False)

#print the top 15 animes
q_anime[['name','rating','members','score']].head(15)

Unnamed: 0,name,rating,members,score
1,Fullmetal Alchemist: Brotherhood,9.26,793665,9.109914
3,Steins;Gate,9.17,673572,9.000495
6,Hunter x Hunter (2011),9.13,425855,8.875192
0,Kimi no Na wa.,9.37,200630,8.837612
10,Clannad: After Story,9.06,456749,8.827177
13,Code Geass: Hangyaku no Lelouch R2,8.98,572888,8.796775
12,Gintama,9.04,336376,8.736097
15,Sen to Chihiro no Kamikakushi,8.93,466254,8.712991
19,Code Geass: Hangyaku no Lelouch,8.83,715151,8.689972
16,Shigatsu wa Kimi no Uso,8.92,416397,8.68053


In [16]:
data['genre'].head()

0                 Drama, Romance, School, Supernatural
1    Action, Adventure, Drama, Fantasy, Magic, Mili...
2    Action, Comedy, Historical, Parody, Samurai, S...
3                                     Sci-Fi, Thriller
4    Action, Comedy, Historical, Parody, Samurai, S...
Name: genre, dtype: object

In [17]:
#define a TF-IDF Vectorizer object. Remove all english stop words such as 'the', 'a'
tfidf = TfidfVectorizer(stop_words='english')

#Replace NaN with an empty string
data['genre'] = data['genre'].fillna('')

#Construct the required TF-IDF matrix by fitting and transforming the data
tfidf_matrix = tfidf.fit_transform(data['genre'])

#output the shape of tfidf_matrix
tfidf_matrix.shape

(12294, 46)

In [0]:
#import linear_kernel
from sklearn.metrics.pairwise import linear_kernel

#compute the cosine similarity matrix
cosin_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [0]:
#construct a reverse map of indices and movie titles
indices = pd.Series(data.index, index=data['name']).drop_duplicates()

In [0]:
#function that takes in anime titles as input and outputs most similar animes
def get_recommendation(name,cosin_sim=cosin_sim):
  #get index of the movie that matchs the name
  idx = indices[name]
  
  #get the pairwise similarity score of all movies with that movie
  sim_score = list(enumerate(cosin_sim[idx]))
  
  #Sort the anmie based on the similarity scores
  sim_scores = sorted(sim_score,key=lambda x: x[1],reverse=True)
  
  # Get the score of the 10 most similar anime
  sim_score = sim_score[1:11]
  
  # Get the anime indices
  anime_indices = [i[0] for i in sim_scores]
  
  # Return the top 10 most similar anime
  return data['name'].iloc[anime_indices]

In [21]:
get_recommendation('Kimi no Na wa.')

0                                           Kimi no Na wa.
5805                           Wind: A Breath of Heart OVA
6394                          Wind: A Breath of Heart (TV)
1111                 Aura: Maryuuin Kouga Saigo no Tatakai
1201                        Angel Beats!: Another Epilogue
1494                                              Harmonie
878                          Shakugan no Shana II (Second)
986                                      Shakugan no Shana
1604                                   Shakugan no Shana S
1959                                             Air Movie
4514                                          Touka Gettan
5031                                        Mizuiro (2003)
5127                                    Venus Versus Virus
4219                           Rokujouma no Shinryakusha!?
11061                                         Renai Boukun
208                          Kokoro ga Sakebitagatterunda.
504      Clannad: After Story - Mou Hitotsu no Sekai, K.