# Recommendation System

# Collaborative Filtering

In [1]:
import pandas as pd 
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import seaborn as sns 
import matplotlib.pyplot as plt 
%matplotlib inline 
plt.rcParams['figure.figsize']=(10,5)
plt.rcParams['figure.dpi']=200

from sklearn.preprocessing import StandardScaler , LabelEncoder,MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df=pd.read_csv("anime.csv")

In [3]:
df

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266
...,...,...,...,...,...,...,...
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,1,4.15,211
12290,5543,Under World,Hentai,OVA,1,4.28,183
12291,5621,Violence Gekiga David no Hoshi,Hentai,OVA,4,4.88,219
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,4.98,175


In [4]:
df.isna().sum()

anime_id      0
name          0
genre        62
type         25
episodes      0
rating      230
members       0
dtype: int64

In [5]:
df.duplicated().sum()

0

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB


In [7]:
for col in df.columns:
    print(col)
    print(df[col].nunique())

anime_id
12294
name
12292
genre
3264
type
6
episodes
187
rating
598
members
6706


In [8]:
df.groupby('genre')['rating'].mean().sort_values(ascending=False).head()

genre
Action, Adventure, Drama, Fantasy, Magic, Military, Shounen    9.26
Drama, Fantasy, Romance, Slice of Life, Supernatural           9.06
Drama, School, Shounen                                         9.05
Action, Drama, Mecha, Military, Sci-Fi, Super Power            8.98
Adventure, Drama, Supernatural                                 8.93
Name: rating, dtype: float64

In [9]:
df.groupby('genre')['rating'].count().sort_values(ascending=False).head()

genre
Hentai                   816
Comedy                   521
Music                    297
Kids                     197
Comedy, Slice of Life    174
Name: rating, dtype: int64

In [10]:
df['genre']=df['genre'].fillna("")
genre_dummies = df['genre'].str.get_dummies(sep=",")


In [11]:
min_max_s = MinMaxScaler()
rating_scaled = min_max_s.fit_transform(df[['rating']].fillna(0))

In [12]:
features = np.hstack([genre_dummies.values,rating_scaled])

In [13]:
similarity = cosine_similarity(features)
similarity

array([[1.        , 0.14014885, 0.14001399, ..., 0.18606056, 0.18912173,
        0.2033087 ],
       [0.14014885, 1.        , 0.36358832, ..., 0.14487842, 0.14726204,
        0.15830891],
       [0.14001399, 0.36358832, 1.        , ..., 0.14473901, 0.14712034,
        0.15815658],
       ...,
       [0.18606056, 0.14487842, 0.14473901, ..., 1.        , 0.99996764,
        0.99895295],
       [0.18912173, 0.14726204, 0.14712034, ..., 0.99996764, 1.        ,
        0.99928866],
       [0.2033087 , 0.15830891, 0.15815658, ..., 0.99895295, 0.99928866,
        1.        ]])

In [14]:
np.fill_diagonal(similarity,0)

In [15]:
similarity

array([[0.        , 0.14014885, 0.14001399, ..., 0.18606056, 0.18912173,
        0.2033087 ],
       [0.14014885, 0.        , 0.36358832, ..., 0.14487842, 0.14726204,
        0.15830891],
       [0.14001399, 0.36358832, 0.        , ..., 0.14473901, 0.14712034,
        0.15815658],
       ...,
       [0.18606056, 0.14487842, 0.14473901, ..., 0.        , 0.99996764,
        0.99895295],
       [0.18912173, 0.14726204, 0.14712034, ..., 0.99996764, 0.        ,
        0.99928866],
       [0.2033087 , 0.15830891, 0.15815658, ..., 0.99895295, 0.99928866,
        0.        ]])

In [16]:
similarity = pd.DataFrame(similarity)
similarity

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12284,12285,12286,12287,12288,12289,12290,12291,12292,12293
0,0.000000,0.140149,0.140014,0.230814,0.138798,0.348074,0.176180,0.381881,0.137985,0.138121,...,0.172483,0.261409,0.179514,0.168575,0.169884,0.162616,0.166931,0.186061,0.189122,0.203309
1,0.140149,0.000000,0.363588,0.179726,0.362910,0.420423,0.623979,0.299262,0.362456,0.362532,...,0.134306,0.000000,0.139781,0.131263,0.132282,0.126623,0.129983,0.144878,0.147262,0.158309
2,0.140014,0.363588,0.000000,0.179553,0.999995,0.272663,0.461621,0.299150,0.999987,0.999989,...,0.134177,0.000000,0.139646,0.131137,0.132155,0.126501,0.129858,0.144739,0.147120,0.158157
3,0.230814,0.179726,0.179553,0.000000,0.177994,0.206044,0.225933,0.225523,0.176952,0.177126,...,0.221192,0.000000,0.230208,0.216180,0.217858,0.208538,0.214072,0.238603,0.242529,0.260722
4,0.138798,0.362910,0.999995,0.177994,0.000000,0.271734,0.460774,0.298133,0.999998,0.999999,...,0.133012,0.000000,0.138433,0.129998,0.131007,0.125403,0.128730,0.143482,0.145843,0.156783
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12289,0.162616,0.126623,0.126501,0.208538,0.125403,0.145165,0.159177,0.158888,0.124668,0.124791,...,0.999680,0.000000,0.999053,0.999884,0.999827,0.000000,0.999939,0.998162,0.997643,0.994345
12290,0.166931,0.129983,0.129858,0.214072,0.128730,0.149017,0.163401,0.163105,0.127977,0.128102,...,0.999898,0.000000,0.999472,0.999991,0.999971,0.999939,0.000000,0.998771,0.998339,0.995457
12291,0.186061,0.144878,0.144739,0.238603,0.143482,0.166093,0.182126,0.181795,0.142642,0.142782,...,0.999377,0.000000,0.999854,0.998971,0.999118,0.998162,0.998771,0.000000,0.999968,0.998953
12292,0.189122,0.147262,0.147120,0.242529,0.145843,0.168826,0.185122,0.184786,0.144989,0.145131,...,0.999060,0.000000,0.999684,0.998574,0.998747,0.997643,0.998339,0.999968,0.000000,0.999289


In [17]:
similarity.index = df['anime_id'].unique()
similarity.columns = df['anime_id'].unique()

In [18]:
similarity

Unnamed: 0,32281,5114,28977,9253,9969,32935,11061,820,15335,15417,...,26031,34399,10368,9352,5541,9316,5543,5621,6133,26081
32281,0.000000,0.140149,0.140014,0.230814,0.138798,0.348074,0.176180,0.381881,0.137985,0.138121,...,0.172483,0.261409,0.179514,0.168575,0.169884,0.162616,0.166931,0.186061,0.189122,0.203309
5114,0.140149,0.000000,0.363588,0.179726,0.362910,0.420423,0.623979,0.299262,0.362456,0.362532,...,0.134306,0.000000,0.139781,0.131263,0.132282,0.126623,0.129983,0.144878,0.147262,0.158309
28977,0.140014,0.363588,0.000000,0.179553,0.999995,0.272663,0.461621,0.299150,0.999987,0.999989,...,0.134177,0.000000,0.139646,0.131137,0.132155,0.126501,0.129858,0.144739,0.147120,0.158157
9253,0.230814,0.179726,0.179553,0.000000,0.177994,0.206044,0.225933,0.225523,0.176952,0.177126,...,0.221192,0.000000,0.230208,0.216180,0.217858,0.208538,0.214072,0.238603,0.242529,0.260722
9969,0.138798,0.362910,0.999995,0.177994,0.000000,0.271734,0.460774,0.298133,0.999998,0.999999,...,0.133012,0.000000,0.138433,0.129998,0.131007,0.125403,0.128730,0.143482,0.145843,0.156783
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9316,0.162616,0.126623,0.126501,0.208538,0.125403,0.145165,0.159177,0.158888,0.124668,0.124791,...,0.999680,0.000000,0.999053,0.999884,0.999827,0.000000,0.999939,0.998162,0.997643,0.994345
5543,0.166931,0.129983,0.129858,0.214072,0.128730,0.149017,0.163401,0.163105,0.127977,0.128102,...,0.999898,0.000000,0.999472,0.999991,0.999971,0.999939,0.000000,0.998771,0.998339,0.995457
5621,0.186061,0.144878,0.144739,0.238603,0.143482,0.166093,0.182126,0.181795,0.142642,0.142782,...,0.999377,0.000000,0.999854,0.998971,0.999118,0.998162,0.998771,0.000000,0.999968,0.998953
6133,0.189122,0.147262,0.147120,0.242529,0.145843,0.168826,0.185122,0.184786,0.144989,0.145131,...,0.999060,0.000000,0.999684,0.998574,0.998747,0.997643,0.998339,0.999968,0.000000,0.999289


In [19]:
similarity.idxmax()

32281      547
5114       121
28977     9969
9253     11577
9969     15417
         ...  
9316     14207
5543      9352
5621      3540
6133     13051
26081     4692
Length: 12294, dtype: int64

In [20]:
df[(df['anime_id']==9316) | (df['anime_id']==14207)]

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
12267,14207,Lovely Series,Hentai,OVA,2,4.11,174
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,1,4.15,211


In [21]:
df[(df['anime_id']==6133) | (df['anime_id']==13051)]

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
12231,13051,Bishoujo Animerama: Miyuki-chan SOS-H Shichauzo,Hentai,OVA,1,4.99,235
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,4.98,175


In [22]:
def recommend(anime_name, top_n=10):
    # Find the index of the anime name
    if anime_name not in df['name'].values:
        return f"'{anime_name}' not found in dataset."

    idx = df[df['name'] == anime_name].index[0]

    # Get similarity scores for this anime
    scores = list(enumerate(similarity[idx]))

    # Sort by similarity (high to low)
    scores = sorted(scores, key=lambda x: x[1], reverse=True)

    # Take top_n+1 because first is the anime itself
    top_indices = [i for i, s in scores[1:top_n+1]]

    # Build result DataFrame
    result = pd.DataFrame({
        'Anime': df.loc[top_indices, 'name'].values,
        'Similarity': [scores[i+1][1] for i in range(top_n)]
    })

    return result


In [23]:
recommend("Naruto", top_n=10)

Unnamed: 0,Anime,Similarity
0,Code Geass: Hangyaku no Lelouch R2 Picture Drama,0.99882
1,Kanamemo,0.881081
2,Penguin Musume♥Heart,0.880684
3,Penguin Musume♥Heart Special,0.879619
4,Green Green Specials,0.879349
5,Cyclops Shoujo Saipuu,0.877856
6,Eureka Seven AO: Aratanari Fukaki Ao,0.839878
7,Momoiro Sisters,0.839878
8,Yutori-chan,0.839877
9,Kamiusagi Rope: Warau Asa ni wa Fukuraitaru tt...,0.839877
