In [110]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [111]:
series = pd.read_csv('/content/drive/MyDrive/csv_files/most_favorites.csv')
movies = pd.read_csv('/content/drive/MyDrive/csv_files/movies.csv')

In [112]:
series.drop(['Unnamed: 0'], axis=1, inplace=True)
movies.drop(['Unnamed: 0'], axis=1, inplace=True)

In [113]:
series.shape, movies.shape

((5981, 4), (865, 4))

In [114]:
series.head(3)

Unnamed: 0,title,genres,description,poster
0,One Piece,"['Action', 'Adventure', 'Comedy', 'Drama', 'Fa...","Gold Roger was known as the ""Pirate King,"" the...",https://img.zorores.com/_r/300x400/100/54/90/5...
1,Chainsaw Man,"['Action', 'Adventure', 'Demons', 'Horror', 'S...","Denji is robbed of a normal teenage life, left...",https://img.zorores.com/_r/300x400/100/b3/da/b...
2,Demon Slayer: Kimetsu no Yaiba Swordsmith Vill...,"['Action', 'Adventure', 'Demons', 'Historical'...",It adapts the story from volume 12 (chapter 98...,https://img.zorores.com/_r/300x400/100/db/2f/d...


In [115]:
movies.head(3)

Unnamed: 0,title,genres,description,poster
0,Detective Conan Movie: The Story of Haibara Ai...,"['Adventure', 'Police']",The movie will focus on Ai Haibara's past and ...,https://img.zorores.com/_r/300x400/100/c1/f3/c...
1,Tsurune Movie: Hajimari no Issha,"['School', 'Sports']",Movie adaptation of Tsurune: Kazemai Koukou Ky...,https://img.zorores.com/_r/300x400/100/f4/0c/f...
2,Backflip!! The Movie,"['Comedy', 'Drama', 'Sports']",Ever since coming across the world of boy’s rh...,https://img.zorores.com/_r/300x400/100/1e/7b/1...


In [116]:
# checking for series only
series = series[~series.title.isin(movies.title)]

## **Finally time for some Preprocessing**

In [117]:
import ast

def removeSpace(genres: str) -> list[str]:
    genres = ast.literal_eval(genres)

    for i in range(len(genres)):
        genres[i] = genres[i].title().replace(' ', '')

    return genres

In [118]:
# trim white spaces betweeen genre
series['genres'] = series.genres.apply(removeSpace)
movies['genres'] = movies.genres.apply(removeSpace)

In [119]:
# convert list of genres into a single string
series['genres'] = series.genres.apply(lambda x: ' '.join(x))
movies['genres'] = movies.genres.apply(lambda x: ' '.join(x))

In [120]:
# adding a new column full description
series['full_description'] = series['genres'] + ' ' + series['description']
movies['full_description'] = movies['genres'] + ' ' + movies['description']

In [121]:
# removing column with null values in description attribute
series.dropna(inplace=True)
movies.dropna(inplace=True)

## **MODEL BUILDING**

In [122]:
from sklearn.feature_extraction.text import CountVectorizer

cv = CountVectorizer(max_features=6000,stop_words='english')

In [123]:
series_vector = cv.fit_transform(series['full_description']).toarray()
movies_vector = cv.fit_transform(movies['full_description']).toarray()

In [124]:
series_vector.shape, movies_vector.shape

((5049, 6000), (850, 6000))

In [125]:
from sklearn.metrics.pairwise import cosine_similarity

series_similarity = cosine_similarity(series_vector)
movies_similarity = cosine_similarity(movies_vector)

In [126]:
series_similarity.shape, movies_similarity.shape

((5049, 5049), (850, 850))

In [131]:
series_similarity[1]

array([0.05665834, 1.        , 0.08119623, ..., 0.02341465, 0.        ,
       0.06819309])

In [144]:
def recommend(anime):
    index = series[series['title'] == anime].index[0]
    distances = sorted(list(enumerate(series_similarity[index])),reverse=True,key = lambda x: x[1])
    for i in distances[1:11]:
        print(series.iloc[i[0]].title)

In [145]:
try:
    recommend('Death Note')
except:
    print('anime not found')

Soul Eater
Momo, Girl God of Death
Kite Liberator
Zombie-Loan
Death Parade
ID: INVADED
Neuro: Supernatural Detective
Scrapped Princess
Bleach: Memories in the Rain
Dusk Maiden of Amnesia


In [132]:
series.title.head(10)

0                                            One Piece
1                                         Chainsaw Man
2    Demon Slayer: Kimetsu no Yaiba Swordsmith Vill...
3                                      Attack on Titan
4                       Demon Slayer: Kimetsu no Yaiba
5                                  Jujutsu Kaisen (TV)
6                                            Blue Lock
7                                      Hell's Paradise
8                               The Eminence in Shadow
9                                           Death Note
Name: title, dtype: object

In [136]:
import pickle

pickle.dump(series_similarity, open('/content/drive/MyDrive/series_similarity.pkl', 'wb'))

In [137]:
pickle.dump(movies_similarity, open('/content/drive/MyDrive/movies_similarity.pkl', 'wb'))