In [1]:
import pandas as pd
import numpy as np
import pickle
import streamlit
from sklearn.metrics.pairwise import cosine_similarity
import requests

In [2]:
anime = pd.read_csv("anime.csv")
ratings = pd.read_csv("rating.csv")

In [30]:
anime.head(1)

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630


In [31]:
ratings.head(1)

Unnamed: 0,user_id,anime_id,rating
0,1,20,-1


## Content Based Recommendation

In [32]:
anime.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB


In [24]:
anime.type.unique()

array(['Movie', 'TV', 'OVA', 'Special', 'Music', 'ONA', nan], dtype=object)

In [3]:
anime['description'] = anime['genre'] + anime['type']

In [5]:
anime = anime.dropna(subset = ['description'])
anime['description'] = anime['description'].apply(lambda x : x.split(','))

In [6]:
anime.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members,description
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,"[Drama, Romance, School, SupernaturalMovie]"
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665,"[Action, Adventure, Drama, Fantasy, Magic,..."
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262,"[Action, Comedy, Historical, Parody, Samur..."
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572,"[Sci-Fi, ThrillerTV]"
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266,"[Action, Comedy, Historical, Parody, Samur..."


In [7]:
anime['description'] = anime['description'].apply(lambda x : [i.replace(' ', '') for i in x])

In [8]:
anime.description

0              [Drama, Romance, School, SupernaturalMovie]
1        [Action, Adventure, Drama, Fantasy, Magic, Mil...
2        [Action, Comedy, Historical, Parody, Samurai, ...
3                                     [Sci-Fi, ThrillerTV]
4        [Action, Comedy, Historical, Parody, Samurai, ...
                               ...                        
12289                                          [HentaiOVA]
12290                                          [HentaiOVA]
12291                                          [HentaiOVA]
12292                                          [HentaiOVA]
12293                                        [HentaiMovie]
Name: description, Length: 12210, dtype: object

In [9]:
anime['description'] = anime['description'].apply(lambda x : ' '.join(x))
anime['description'] = anime['description'].apply(lambda x : x.lower())
anime.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members,description
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,drama romance school supernaturalmovie
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665,action adventure drama fantasy magic military ...
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262,action comedy historical parody samurai sci-fi...
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572,sci-fi thrillertv
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266,action comedy historical parody samurai sci-fi...


In [10]:
from nltk.stem import PorterStemmer
ps = PorterStemmer()

In [11]:
def stem(text) :
    res = []
    for i in text.split():
        res.append(ps.stem(i))
    
    return ' '.join(res)

In [12]:
anime['description'] = anime['description'].apply(stem)
anime.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members,description
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,drama romanc school supernaturalmovi
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665,action adventur drama fantasi magic militari s...
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262,action comedi histor parodi samurai sci-fi sho...
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572,sci-fi thrillertv
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266,action comedi histor parodi samurai sci-fi sho...


In [13]:
# copy:https://www.kaggle.com/indralin/try-content-based-and-collaborative-filtering
import re
def text_cleaning(text):
    text = re.sub(r'&quot;', '', text)
    text = re.sub(r'.hack//', '', text)
    text = re.sub(r'&#039;', '', text)
    text = re.sub(r'A&#039;s', '', text)
    text = re.sub(r'I&#039;', 'I\'', text)
    text = re.sub(r'&amp;', 'and', text)
    
    return text

anime['name'] = anime['name'].apply(text_cleaning)

In [14]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(min_df = 3, max_features = None, ngram_range = (1,3) , stop_words = 'english')
vectors = cv.fit_transform(anime['description']).toarray()

In [15]:
vectors.shape

(12210, 2462)

In [16]:
content_similarity = cosine_similarity(vectors)

In [65]:
def content_recommend(anime_title):
    
    index = np.where(anime['name'] == anime_title)[0][0]
    similar_animes = sorted(enumerate(content_similarity[index]), key = lambda x : x[1], reverse = True)[1:6]
    
    for i in similar_animes:
        print(anime['name'][i[0]])
            

In [56]:
anime['name'][:]

0                                           Kimi no Na wa.
1                         Fullmetal Alchemist: Brotherhood
2                                                 Gintama°
3                                              Steins;Gate
4                                                  Gintama
                               ...                        
12289         Toushindai My Lover: Minami tai Mecha-Minami
12290                                          Under World
12291                       Violence Gekiga David no Hoshi
12292    Violence Gekiga Shin David no Hoshi: Inma Dens...
12293                     Yasuji no Pornorama: Yacchimae!!
Name: name, Length: 12210, dtype: object

In [64]:
recommend('Naruto: Shippuuden')

Naruto
Ranma ½: Akumu! Shunmin Kou
Boruto: Naruto the Movie
Boruto: Naruto the Movie - Naruto ga Hokage ni Natta Hi
Naruto x UT


In [17]:
pickle.dump(anime , open('anime.pkl' , 'wb'))
pickle.dump(content_similarity , open('Csimilarity.pkl' , 'wb'))

In [None]:
!streamlit run anime_app.py