In [22]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df = pd.read_csv("anime.csv")
df.shape

(12294, 7)

In [3]:
df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [4]:
df.isna().sum()

anime_id      0
name          0
genre        62
type         25
episodes      0
rating      230
members       0
dtype: int64

In [5]:
df['genre'] = df['genre'].fillna('0')
df['type'] = df['type'].fillna('0')
df['rating'] = df['rating'].fillna('0')

In [6]:
df['rating']

0        9.37
1        9.26
2        9.25
3        9.17
4        9.16
         ... 
12289    4.15
12290    4.28
12291    4.88
12292    4.98
12293    5.46
Name: rating, Length: 12294, dtype: object

In [7]:
df.isna().sum()

anime_id    0
name        0
genre       0
type        0
episodes    0
rating      0
members     0
dtype: int64

In [8]:
df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [9]:
df['rating'] = pd.to_numeric(df['rating'])

In [13]:
new_rating_df = df[['anime_id','name','genre','rating']]


Unnamed: 0,anime_id,name,genre,rating
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",9.37
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",9.26
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",9.25
3,9253,Steins;Gate,"Sci-Fi, Thriller",9.17
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",9.16
...,...,...,...,...
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,4.15
12290,5543,Under World,Hentai,4.28
12291,5621,Violence Gekiga David no Hoshi,Hentai,4.88
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,4.98


In [16]:
new_rating_df['avg_rating'] = df['rating'].mean()
new_rating_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_rating_df['avg_rating'] = df['rating'].mean()


Unnamed: 0,anime_id,name,genre,rating,avg_rating
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",9.37,6.352786
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",9.26,6.352786
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",9.25,6.352786
3,9253,Steins;Gate,"Sci-Fi, Thriller",9.17,6.352786
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",9.16,6.352786
...,...,...,...,...,...
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,4.15,6.352786
12290,5543,Under World,Hentai,4.28,6.352786
12291,5621,Violence Gekiga David no Hoshi,Hentai,4.88,6.352786
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,4.98,6.352786


In [15]:
new_rating_df.isna().sum()

anime_id      0
name          0
genre         0
rating        0
avg_rating    0
dtype: int64

#### Contect Based Recommendation System

In [None]:
#converting text data into numerical data
cv = CountVectorizer(max_features=12294 ,stop_words='english')
vector = cv.fit_transform(new_rating_df['genre'].values.astype('U')).toarray()

In [21]:
vector.shape

(12294, 46)

In [23]:
# Compute the cosine similarity matrix
similarity = cosine_similarity(vector)

In [25]:
similarity.shape

(12294, 12294)

In [24]:
similarity

array([[1.        , 0.18898224, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.18898224, 1.        , 0.26726124, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.26726124, 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 1.        ,
        1.        ],
       [0.        , 0.        , 0.        , ..., 1.        , 1.        ,
        1.        ],
       [0.        , 0.        , 0.        , ..., 1.        , 1.        ,
        1.        ]])

In [27]:
###Construct a reverse map of indices and movie titles
anime_indices_map = pd.Series(new_rating_df.index, index=new_rating_df['name']).drop_duplicates()
anime_indices_map

name
Kimi no Na wa.                                            0
Fullmetal Alchemist: Brotherhood                          1
Gintama°                                                  2
Steins;Gate                                               3
Gintama&#039;                                             4
                                                      ...  
Toushindai My Lover: Minami tai Mecha-Minami          12289
Under World                                           12290
Violence Gekiga David no Hoshi                        12291
Violence Gekiga Shin David no Hoshi: Inma Densetsu    12292
Yasuji no Pornorama: Yacchimae!!                      12293
Length: 12294, dtype: int64

In [38]:
def get_recommendations(name,similarity=similarity):
    # Get the index of the anime that matches the name
    idx = anime_indices_map[name]
    sim_scores = list(enumerate(similarity[idx]))

    # Sort the anime based on the similarity scores
    sim_scores = sorted(sim_scores,key = lambda x: x[1],reverse=True) 
    # Get the scores of the 5 most similar anime
    sim_scores = sim_scores[1:6]

    ##Get the anime indices
    anime_indices = [i[0] for i in sim_scores]

    # Return the top 5 most similar anime
    return new_rating_df['name'].iloc[anime_indices]

In [39]:
get_recommendations("Fullmetal Alchemist: Brotherhood")

200                               Fullmetal Alchemist
1558    Fullmetal Alchemist: The Sacred Star of Milos
402         Fullmetal Alchemist: Brotherhood Specials
9259                                  Kkomaeosa Ttori
101                        Magi: The Kingdom of Magic
Name: name, dtype: object

In [37]:
get_recommendations("Gintama")

2                                              Gintama°
4                                         Gintama&#039;
8     Gintama Movie: Kanketsu-hen - Yorozuya yo Eien...
9                              Gintama&#039;: Enchousen
12                                              Gintama
63          Gintama: Yorinuki Gintama-san on Theater 2D
Name: name, dtype: object