## **Collaborative Filtering (User Based)**

In [0]:
#Importing relevant libraries

import numpy as np
import pandas as pd

In [0]:
#Loading csv files from drive

anime = pd.read_csv('/content/drive/My Drive/anime/anime.csv')
reviews = pd.read_csv('/content/drive/My Drive/anime/rating.csv')

## **Inspecting data**

In [19]:
#Looking at the contents in anime.csv file

anime.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [20]:
#Looking at the contents in ratings.csv file

reviews.head()

Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1


In [21]:
reviews.tail()

Unnamed: 0,user_id,anime_id,rating
7813732,73515,16512,7
7813733,73515,17187,9
7813734,73515,22145,10
7813735,73516,790,9
7813736,73516,8074,9


## **Data Cleaning**

In [22]:
#Checking for missing values in anime file

anime.isnull().sum()

anime_id      0
name          0
genre        62
type         25
episodes      0
rating      230
members       0
dtype: int64

####*62 Genres and 25 types are not specified*
####*230 ratings are not given!*

In [23]:
#Checking for missing values in ratings file

reviews.isnull().sum()

user_id     0
anime_id    0
rating      0
dtype: int64

*No missing values found..* 🎉🎊

In [24]:
#Dealing with the missing values

anime['type'] = anime['type'].fillna('None')
anime['genre'] = anime['genre'].fillna('None')
anime['rating'] = anime['rating'].fillna(anime['rating'].median())
anime.isnull().sum()

anime_id    0
name        0
genre       0
type        0
episodes    0
rating      0
members     0
dtype: int64

Ok! Now that the missing values are dealt with let's move forward

In [25]:
len(reviews) #Total length of ratings table

7813737

In [26]:
len(anime) #Total length of anime table

12294

We'll be using **2 systems** for calculating **similarity scores**

*   **Eucledian Distance**
*   **Pearson Correlation**





##**Using Eucledian Distance**

In [0]:
count = len(reviews) #Taking the length of reviews into a variable count
preferences = {} #Initialising a dictionary "preferences"

In [0]:
#Numpy array
review_array = reviews.values

In [0]:
for i in range(count):
    user = review_array[i, 0]
    anime = review_array[i, 1]
    rating = review_array[i, 2]
    
    if user not in preferences.keys():
        preferences[user] = {}

    if rating != -1: #Dealing with the -1 ratings(Anime watched but not rated)
        preferences[user][anime] = rating

In [0]:
def sim_euclidian(preferences, person1,person2):
    
    similarity = {}
    
    #getting similar anime watched
    
    for anime in preferences[person1]:
        if anime in preferences[person2]:
            similarity[anime] = 1
    
    if len(similarity) == 0:
        return 0
    
    Euclidian_distance = np.sqrt(np.sum([np.power(preferences[person1][anime] - preferences[person2][anime],2) 
                                 for anime in preferences[person1] if anime in preferences[person2]]))
    
    return 1/(1 + Euclidian_distance)

In [0]:
def top_matches(preferences, person, n = 5, similarity = sim_euclidian):
    
    scores = [(similarity(preferences,person,reviewer),reviewer) for reviewer in preferences if reviewer != person]
    
    scores.sort()
    scores.reverse()
    return scores[:n]

In [0]:
top_match = top_matches(preferences,1)

In [34]:
top_match

[(1.0, 73507), (1.0, 73481), (1.0, 73473), (1.0, 73440), (1.0, 73439)]

In [0]:
def recommendations(preferences, person, similarity = sim_euclidian):
    total = {}
    simsum = {}
    
    for reviwer in preferences:
        if reviwer == person:
            continue
        sim = similarity(preferences,person,reviwer)
        
        if sim <= 0:
            continue
        for anime in preferences[reviwer]:
            
            if anime not in preferences[person] or preferences[person][anime] == 0:
                total.setdefault(anime,0)
                total[anime] += preferences[reviwer][anime] * sim
                
                simsum.setdefault(anime,0)
                simsum[anime] += sim
                
    ranks = [(total/simsum[anime],anime) for anime,total in total.items()]
    
    ranks.sort()
    ranks.reverse()
    
    return ranks[:10],person

In [0]:
anime = pd.read_csv('/content/drive/My Drive/anime/anime.csv')
anime_array = anime.values

In [37]:
anime_array[0][0]

32281

In [0]:
top_rec,user = recommendations(preferences,1)

In [0]:
anime_list = []

for i in top_rec:
    for j in range(len(anime_array)):
        if i[1] == anime_array[j][0]:
            anime_list.append(anime_array[j])

In [45]:
print("Top Recommendations for you:\n")
for i in anime_list:
    print("Anime :",i[1])
    print("Genre :",i[2])
    print("-"*50)

Top Recommendations for you:

Anime : STAR BEAT!: Hoshi no Kodou
Genre : Music
--------------------------------------------------
Anime : Shiroi Zou
Genre : Action, Historical, Kids
--------------------------------------------------
Anime : Dededen
Genre : Slice of Life
--------------------------------------------------
Anime : Trapp Ikka Monogatari Specials
Genre : Drama, Historical, Music, Romance
--------------------------------------------------
Anime : Konna Watashitachi ga Nariyuki de Heroine ni Natta Kekka www
Genre : Comedy, Fantasy
--------------------------------------------------
Anime : gdgd Fairies Movie: tte Iu Eiga wa Dou kana...?
Genre : Comedy, Fantasy
--------------------------------------------------
Anime : Meitantei Holmes: Mrs. Hudson Hitojichi Jiken no Maki / Dover Kaikyou no Daikuuchuusen no Maki
Genre : Adventure, Comedy, Mystery
--------------------------------------------------
Anime : Only You: Viva! Cabaret Club
Genre : Comedy
------------------------------

## **Using Pearson**

In [0]:
count = len(reviews) #Taking the length of reviews into a variable count
preferences = {} #Initialising a dictionary "preferences"

In [0]:
#Numpy array
review_array = reviews.values

In [0]:
for i in range(count):
    user = review_array[i, 0]
    anime = review_array[i, 1]
    rating = review_array[i, 2]
    
    if user not in preferences.keys():
        preferences[user] = {}

    preferences[user][anime] = rating

In [0]:
def sim_pearson(preferences, person1,person2):
    
    similarity = {}
    
    #getting similar anime watched
    
    for anime in preferences[person1]:
        if anime in preferences[person2]:
            similarity[anime] = 1
    
    if len(similarity) == 0:
        return 0
    
    sum1 = np.sum([preferences[person1][anime] for anime in similarity])
    sum2 = np.sum([preferences[person2][anime] for anime in similarity])
    
    sum1sq = np.sum([np.power(preferences[person1][anime],2) for anime in similarity])
    sum2sq = np.sum([np.power(preferences[person2][anime],2) for anime in similarity])
    
    pSum = np.sum([preferences[person1][anime] * preferences[person2][anime] for anime in similarity])
    
    num = pSum - (sum1 * sum2/len(similarity))
    den = np.sqrt((sum1sq - np.power(sum1,2)/len(similarity))*(sum2sq - np.power(sum2,2)/len(similarity)))
    if den == 0:
        return 0
    
    r = num/den
    
    return r
    

In [0]:
def top_matches(preferences, person, n = 5, similarity = sim_pearson):
    
    scores = [(similarity(preferences,person,reviewer),reviewer) for reviewer in preferences if reviewer != person]
    
    scores.sort()
    scores.reverse()
    return scores[:n]

In [0]:
top_match = top_matches(preferences,1)

In [54]:
top_match

[(1.000000000000011, 35755),
 (1.000000000000011, 35508),
 (1.0000000000000084, 32904),
 (1.0000000000000084, 27924),
 (1.0000000000000084, 21376)]

In [0]:
def recommendations(preferences, person, similarity = sim_pearson):
    total = {}
    simsum = {}
    
    for reviwer in preferences:
        if reviwer == person:
            continue
        sim = similarity(preferences,person,reviwer)
        
        if sim <= 0:
            continue
        for anime in preferences[reviwer]:
            
            if anime not in preferences[person] or preferences[person][anime] == 0:
                
                #Formula for calculating the pearson Score 
                total.setdefault(anime,0)
                total[anime] += preferences[reviwer][anime] * sim
                
                simsum.setdefault(anime,0)
                simsum[anime] += sim
                
    ranks = [(total/simsum[anime],anime) for anime,total in total.items()]
    
    ranks.sort()
    ranks.reverse()
    
    return ranks[:10],person

In [0]:
anime = pd.read_csv('/content/drive/My Drive/anime/anime.csv')
anime_array = anime.values

In [57]:
anime_array[0][0]

32281

In [0]:
top_rec,user = recommendations(preferences,1)

In [0]:
anime_list = []
for i in top_rec:
    for j in range(len(anime_array)):
        if i[1] == anime_array[j][0]:
            anime_list.append(anime_array[j])

In [60]:
print("Top Recommendations for you:\n")
for i in anime_list:
    print("Anime :",i[1])
    print("Genre :",i[2])
    print("-"*50)

Top Recommendations for you:

Anime : Kirin Monoshiri Yakata
Genre : Kids
--------------------------------------------------
Anime : Midoriyama Koukou Koushien-hen
Genre : Comedy, School, Sports
--------------------------------------------------
Anime : Shiroi Zou
Genre : Action, Historical, Kids
--------------------------------------------------
Anime : Doukyuusei
Genre : Kids, Music
--------------------------------------------------
Anime : Dededen
Genre : Slice of Life
--------------------------------------------------
Anime : Konna Watashitachi ga Nariyuki de Heroine ni Natta Kekka www (TV)
Genre : Comedy, Fantasy
--------------------------------------------------
Anime : Trapp Ikka Monogatari Specials
Genre : Drama, Historical, Music, Romance
--------------------------------------------------
Anime : Konna Watashitachi ga Nariyuki de Heroine ni Natta Kekka www
Genre : Comedy, Fantasy
--------------------------------------------------
Anime : Meitantei Holmes: Mrs. Hudson Hitojichi