# Anime Recomendation System

### **Import Module and Data**

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ast
import string
import re
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

In [2]:
data_original=pd.read_csv('anime_details_final.csv')
data=data_original.copy()
data=data[data['type']=='TV']
data.shape

(4202, 9)

### **Data Preprocessing**

In [3]:
data=data.drop_duplicates()
data.head()

Unnamed: 0,name,type,episodes,mal_id,english_title,studio,score,genere,description
0,Sousou no Frieren,TV,28.0,52991,Frieren: Beyond Journey's End,['Madhouse'],9.39,"['Shounen', 'Adventure', 'Drama', 'Fantasy']",During their decade-long quest to defeat the D...
1,Fullmetal Alchemist: Brotherhood,TV,64.0,5114,Fullmetal Alchemist: Brotherhood,['Bones'],9.09,"['Military', 'Shounen', 'Action', 'Adventure',...",After a horrific alchemy experiment goes wrong...
2,Steins;Gate,TV,24.0,9253,Steins;Gate,['White Fox'],9.07,"['Psychological', 'Time Travel', 'Drama', 'Sci...",Eccentric scientist Rintarou Okabe has a never...
3,Gintama°,TV,51.0,28977,Gintama Season 4,['Bandai Namco Pictures'],9.06,"['Gag Humor', 'Historical', 'Parody', 'Samurai...","Gintoki, Shinpachi, and Kagura return as the f..."
4,Shingeki no Kyojin Season 3 Part 2,TV,10.0,38524,Attack on Titan Season 3 Part 2,['Wit Studio'],9.05,"['Gore', 'Military', 'Survival', 'Shounen', 'A...",Seeking to restore humanity's diminishing hope...


#### **Merge `name` and `english_title` give priority to `english_title`**

In [4]:
data['title'] = data.apply(lambda row: row['english_title'] if pd.notnull(row['english_title']) else row['name'], axis=1)

#### **Drop Columns that are not required**

In [5]:
model_data=data.drop(columns=['name','episodes','mal_id','english_title','type'],axis=1)

In [6]:
model_data.reset_index(drop=True,inplace=True)

#### **Fill null `description` with empty string**

In [7]:
model_data['description']=model_data['description'].fillna("")

#### **Formating Text Data**

In [8]:
from nltk.stem.porter import PorterStemmer
ps=PorterStemmer()
def clean_description(des):
    cleaned_text = des.lower()
    cleaned_text = cleaned_text.replace('\n\n[written by mal rewrite]', '')
    cleaned_text = cleaned_text.replace('\n', '')
    cleaned_text = cleaned_text.replace('\b', '')

    punctuation_chars = set(string.punctuation)
    punctuation_chars.remove("'")
    modified_text = ""
    for char in cleaned_text:
        modified_text += char
        if char in punctuation_chars:
            modified_text += " "
    cleaned_text=modified_text
    
    translator = str.maketrans('', '', string.punctuation.replace("'", ""))
    cleaned_text = cleaned_text.translate(translator)

    cleaned_text = re.sub(r'\s+', ' ', cleaned_text)

    cleaned_text=re.sub(r'\d+', '', cleaned_text)

    tokens = word_tokenize(cleaned_text)

    stop_words = set(stopwords.words('english'))
    
    # Remove stopwords from the text
    cleaned_text = [word for word in tokens if word.lower() not in stop_words]
     # Initialize lemmatizer
    lemmatizer = WordNetLemmatizer()
    
    # Lemmatize each token
    cleaned_text = [ps.stem(token) for token in cleaned_text]
    cleaned_text=' '.join(cleaned_text)
    return cleaned_text

def modify_genere_studio(generes):
    generes=[''.join(genere.split()) for genere in generes]
    return ' '.join(generes)

In [9]:
model_data.head()

Unnamed: 0,studio,score,genere,description,title
0,['Madhouse'],9.39,"['Shounen', 'Adventure', 'Drama', 'Fantasy']",During their decade-long quest to defeat the D...,Frieren: Beyond Journey's End
1,['Bones'],9.09,"['Military', 'Shounen', 'Action', 'Adventure',...",After a horrific alchemy experiment goes wrong...,Fullmetal Alchemist: Brotherhood
2,['White Fox'],9.07,"['Psychological', 'Time Travel', 'Drama', 'Sci...",Eccentric scientist Rintarou Okabe has a never...,Steins;Gate
3,['Bandai Namco Pictures'],9.06,"['Gag Humor', 'Historical', 'Parody', 'Samurai...","Gintoki, Shinpachi, and Kagura return as the f...",Gintama Season 4
4,['Wit Studio'],9.05,"['Gore', 'Military', 'Survival', 'Shounen', 'A...",Seeking to restore humanity's diminishing hope...,Attack on Titan Season 3 Part 2


In [10]:
model_data['genere']=model_data['genere'].apply(lambda x:ast.literal_eval(x.lower())) # Lower Case all text and convert string to list
model_data['studio']=model_data['studio'].apply(lambda x:ast.literal_eval(x.lower())) # Lower Case all text and convert string to list
model_data['description']=model_data['description'].apply(clean_description) # Clean Description

In [11]:
model_data['genere']=model_data['genere'].apply(modify_genere_studio)
model_data['studio']=model_data['studio'].apply(modify_genere_studio)

In [12]:
model_data

Unnamed: 0,studio,score,genere,description,title
0,madhouse,9.39,shounen adventure drama fantasy,decad long quest defeat demon king member hero...,Frieren: Beyond Journey's End
1,bones,9.09,military shounen action adventure drama fantasy,horrif alchemi experi goe wrong elric househol...,Fullmetal Alchemist: Brotherhood
2,whitefox,9.07,psychological timetravel drama sci-fi suspense,eccentr scientist rintar okab never end thirst...,Steins;Gate
3,bandainamcopictures,9.06,gaghumor historical parody samurai shounen act...,gintoki shinpachi kagura return fun love broke...,Gintama Season 4
4,witstudio,9.05,gore military survival shounen action drama su...,seek restor human 's diminish hope survey corp...,Attack on Titan Season 3 Part 2
...,...,...,...,...,...
4195,studiocomet,5.88,mythology horror supernatural,plot seri revolv around three youkai bem bera ...,Humanoid Monster Bem
4196,shogakukanmusic&digitalentertainment,5.88,kids comedy,daili live boy talk penguin,Penguin no Mondai
4197,zero-g,5.88,workplace comedy gourmet sliceoflife,summer break morina nanas start new part time ...,Piacevole: My Italian Cooking
4198,productioni.g olm,5.89,kids drama sliceoflife,stori begin futur earth longer inhabit live cr...,Pikaia!


In [13]:
model_data['combined_attributes']=model_data['studio']+' '+model_data['genere']

In [14]:
model_data

Unnamed: 0,studio,score,genere,description,title,combined_attributes
0,madhouse,9.39,shounen adventure drama fantasy,decad long quest defeat demon king member hero...,Frieren: Beyond Journey's End,madhouse shounen adventure drama fantasy
1,bones,9.09,military shounen action adventure drama fantasy,horrif alchemi experi goe wrong elric househol...,Fullmetal Alchemist: Brotherhood,bones military shounen action adventure drama ...
2,whitefox,9.07,psychological timetravel drama sci-fi suspense,eccentr scientist rintar okab never end thirst...,Steins;Gate,whitefox psychological timetravel drama sci-fi...
3,bandainamcopictures,9.06,gaghumor historical parody samurai shounen act...,gintoki shinpachi kagura return fun love broke...,Gintama Season 4,bandainamcopictures gaghumor historical parody...
4,witstudio,9.05,gore military survival shounen action drama su...,seek restor human 's diminish hope survey corp...,Attack on Titan Season 3 Part 2,witstudio gore military survival shounen actio...
...,...,...,...,...,...,...
4195,studiocomet,5.88,mythology horror supernatural,plot seri revolv around three youkai bem bera ...,Humanoid Monster Bem,studiocomet mythology horror supernatural
4196,shogakukanmusic&digitalentertainment,5.88,kids comedy,daili live boy talk penguin,Penguin no Mondai,shogakukanmusic&digitalentertainment kids comedy
4197,zero-g,5.88,workplace comedy gourmet sliceoflife,summer break morina nanas start new part time ...,Piacevole: My Italian Cooking,zero-g workplace comedy gourmet sliceoflife
4198,productioni.g olm,5.89,kids drama sliceoflife,stori begin futur earth longer inhabit live cr...,Pikaia!,productioni.g olm kids drama sliceoflife


In [66]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
cv_genere=CountVectorizer(max_features=5000)
tfid_genere=TfidfVectorizer(ngram_range=(1,2))
tfid_studio=TfidfVectorizer(ngram_range=(1,2))
tfid_description=TfidfVectorizer(ngram_range=(1,2))

In [67]:
genre_tfidf_matrix = tfid_genere.fit_transform(model_data['genere']).toarray()
description_tfidf_matrix = tfid_description.fit_transform(model_data['description']).toarray()
studio_tfidf_matrix = tfid_studio.fit_transform(model_data['studio']).toarray()

In [68]:
# Convert matrices to DataFrames
df1 = pd.DataFrame(genre_tfidf_matrix * 0.6,columns=tfid_genere.get_feature_names_out())
df2 = pd.DataFrame(description_tfidf_matrix * 0.35,columns=tfid_description.get_feature_names_out())
df3 = pd.DataFrame(studio_tfidf_matrix * 0.05,columns=tfid_studio.get_feature_names_out())

# Concatenate DataFrames along columns axis
combined_df = pd.concat([df1, df2,df3], axis=1)

# Optionally reset index if needed
combined_df.reset_index(drop=True, inplace=True)

In [69]:
combined_df.head()

Unnamed: 0,action,action adventure,action avantgarde,action awardwinning,action boyslove,action comedy,action drama,action ecchi,action fantasy,action girlslove,...,yuhodo,yumetacompany,yumetacompany graphinica,zero,zero digitalnetworkanimation,zero liber,zexcs,zexcs assezfinaudfabric,zexcs studioa,zuiyo
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.099747,0.150355,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.066057,0.0,0.0,0.0,0.0,0.125992,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.067854,0.0,0.0,0.0,0.0,0.0,0.142548,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [57]:
# vectors=tfid.fit_transform(model_data['combined_attributes']).toarray()

In [58]:
# vectors.shape

In [70]:
from sklearn.metrics.pairwise import cosine_similarity
similarity_matrix=cosine_similarity(combined_df.values)

In [71]:
sorted(similarity_matrix[0],reverse=True)

[0.999999999999999,
 0.5800516581787761,
 0.5789732228534381,
 0.5788241811085408,
 0.5776231347815413,
 0.5070680846127401,
 0.5068570604391863,
 0.5049027998624804,
 0.5032096461490617,
 0.5027300245944157,
 0.5000158647219103,
 0.4716884487932264,
 0.47092063838682446,
 0.4703960353363071,
 0.45973251576932317,
 0.4502427086179943,
 0.44641530173448535,
 0.44591428530482147,
 0.44484639606432114,
 0.4430156075658349,
 0.44301141173783143,
 0.437838472457392,
 0.43433289653493995,
 0.43185775207212995,
 0.43149729271277637,
 0.43147264263459684,
 0.43000004929114455,
 0.42982487950272874,
 0.42794999315609034,
 0.42683447330270635,
 0.42530762246694775,
 0.42406520041218443,
 0.4232492393615844,
 0.42277191883878323,
 0.4199505084141535,
 0.4165700408523421,
 0.41032318586644434,
 0.401656267462367,
 0.40091585639085725,
 0.399690966251013,
 0.39960292692623645,
 0.3991442287080802,
 0.3981808358795694,
 0.37736401368688727,
 0.3768495567446125,
 0.3765993083717276,
 0.37144127222771

In [72]:
def recommend(movie):
    movie_index=model_data[model_data['title']==movie].index[0]
    distances=similarity_matrix[movie_index]
    movies_list=sorted(list(enumerate(distances)),reverse=True,key=lambda x:x[1])[1:31]
    print(model_data.iloc[movie_index].title)
    for i in movies_list:
        print("-------------------------------")
        print(model_data.iloc[i[0]].title)
        print("Similarity Score:",i[1])
        print("Index:",i[0])

In [74]:
recommend("One Piece")

One Piece
-------------------------------
Dragon Quest: The Adventure of Dai
Similarity Score: 0.7520937752478172
Index: 630
-------------------------------
Beet the Vandel Buster
Similarity Score: 0.7496793436373813
Index: 2240
-------------------------------
Beet the Vandel Buster Excellion
Similarity Score: 0.7496793436373813
Index: 2783
-------------------------------
Dragon Quest: Dai no Daibouken (TV)
Similarity Score: 0.7489996286794414
Index: 846
-------------------------------
Magi: Adventure of Sinbad
Similarity Score: 0.748024782063208
Index: 512
-------------------------------
The Seven Deadly Sins: Dragon's Judgement
Similarity Score: 0.7464197821773717
Index: 3014
-------------------------------
Bleach: Thousand-Year Blood War - The Separation
Similarity Score: 0.746378648175764
Index: 43
-------------------------------
Hunter x Hunter
Similarity Score: 0.7458551920782805
Index: 119
-------------------------------
Fairy Tail
Similarity Score: 0.7456578775859697
Index: 847

In [25]:
model_data

Unnamed: 0,studio,score,genere,description,title,combined_attributes
0,madhouse,9.39,shounen adventure drama fantasy,decad long quest defeat demon king member hero...,Frieren: Beyond Journey's End,madhouse shounen adventure drama fantasy
1,bones,9.09,military shounen action adventure drama fantasy,horrif alchemi experi goe wrong elric househol...,Fullmetal Alchemist: Brotherhood,bones military shounen action adventure drama ...
2,whitefox,9.07,psychological timetravel drama sci-fi suspense,eccentr scientist rintar okab never end thirst...,Steins;Gate,whitefox psychological timetravel drama sci-fi...
3,bandainamcopictures,9.06,gaghumor historical parody samurai shounen act...,gintoki shinpachi kagura return fun love broke...,Gintama Season 4,bandainamcopictures gaghumor historical parody...
4,witstudio,9.05,gore military survival shounen action drama su...,seek restor human 's diminish hope survey corp...,Attack on Titan Season 3 Part 2,witstudio gore military survival shounen actio...
...,...,...,...,...,...,...
4195,studiocomet,5.88,mythology horror supernatural,plot seri revolv around three youkai bem bera ...,Humanoid Monster Bem,studiocomet mythology horror supernatural
4196,shogakukanmusic&digitalentertainment,5.88,kids comedy,daili live boy talk penguin,Penguin no Mondai,shogakukanmusic&digitalentertainment kids comedy
4197,zero-g,5.88,workplace comedy gourmet sliceoflife,summer break morina nanas start new part time ...,Piacevole: My Italian Cooking,zero-g workplace comedy gourmet sliceoflife
4198,productioni.g olm,5.89,kids drama sliceoflife,stori begin futur earth longer inhabit live cr...,Pikaia!,productioni.g olm kids drama sliceoflife
