In [18]:
import pandas as pd
import numpy as np
from pathlib import Path

import warnings
warnings.filterwarnings('ignore')

In [19]:
series = pd.read_csv(Path.cwd()/'datasets/anime_series.csv')
movies = pd.read_csv(Path.cwd()/'datasets/anime_movies.csv')

In [20]:
series.drop(['Unnamed: 0'], axis=1, inplace=True)
movies.drop(['Unnamed: 0'], axis=1, inplace=True)

In [21]:
series.shape, movies.shape

((5069, 6), (835, 6))

In [22]:
series.head(3)

Unnamed: 0,title,genres,description,poster,link,full_description
0,One Piece,Action Adventure Comedy Drama Fantasy Shounen ...,"Gold Roger was known as the ""Pirate King,"" the...",https://img.aniwatchres.com/_r/300x400/100/54/...,https://aniwatch.to/one-piece-100,Action Adventure Comedy Drama Fantasy Shounen ...
1,Chainsaw Man,Action Adventure Demons Horror Shounen,"Denji is robbed of a normal teenage life, left...",https://img.aniwatchres.com/_r/300x400/100/b3/...,https://aniwatch.to/chainsaw-man-17406,Action Adventure Demons Horror Shounen Denji i...
2,Demon Slayer: Kimetsu no Yaiba Swordsmith Vill...,Action Adventure Demons Historical Shounen Sup...,It adapts the story from volume 12 (chapter 98...,https://img.aniwatchres.com/_r/300x400/100/db/...,https://aniwatch.to/demon-slayer-kimetsu-no-ya...,Action Adventure Demons Historical Shounen Sup...


In [23]:
movies.head(3)

Unnamed: 0,title,genres,description,poster,link,full_description
0,Detective Conan Movie: The Story of Haibara Ai...,Adventure Police,The movie will focus on Ai Haibara's past and ...,https://img.zorores.com/_r/300x400/100/c1/f3/c...,https://zoro.to/detective-conan-movie-the-stor...,Adventure Police The movie will focus on Ai Ha...
1,Tsurune Movie: Hajimari no Issha,School Sports,Movie adaptation of Tsurune: Kazemai Koukou Ky...,https://img.zorores.com/_r/300x400/100/f4/0c/f...,https://zoro.to/tsurune-movie-hajimari-no-issh...,School Sports Movie adaptation of Tsurune: Kaz...
2,Backflip!! The Movie,Comedy Drama Sports,Ever since coming across the world of boy’s rh...,https://img.zorores.com/_r/300x400/100/1e/7b/1...,https://zoro.to/backflip-the-movie-18405,Comedy Drama Sports Ever since coming across t...


In [24]:
# checking for series only
series = series[~series.title.isin(movies.title)]

In [25]:
series.shape, movies.shape

((5069, 6), (835, 6))

In [26]:
series.dropna(inplace=True)
movies.dropna(inplace=True)

In [27]:
series.shape, movies.shape

((5069, 6), (835, 6))

In [None]:
series.to_csv(Path.cwd()/'datasets/anime_series.csv')
movies.to_csv(Path.cwd()/'datasets/anime_movies.csv')

## **Finally time for some Preprocessing**

In [None]:
import ast

def removeSpace(genres: str) -> list[str]:
    genres = ast.literal_eval(genres)

    for i in range(len(genres)):
        genres[i] = genres[i].title().replace(' ', '')

    return genres

In [None]:
# trim white spaces betweeen genre
series['genres'] = series.genres.apply(removeSpace)
movies['genres'] = movies.genres.apply(removeSpace)

In [None]:
# convert list of genres into a single string
series['genres'] = series.genres.apply(lambda x: ' '.join(x))
movies['genres'] = movies.genres.apply(lambda x: ' '.join(x))

In [None]:
# adding a new column full description
series['full_description'] = series['genres'] + ' ' + series['description']
movies['full_description'] = movies['genres'] + ' ' + movies['description']

In [None]:
series.shape, movies.shape

((5069, 6), (835, 6))

## **Convert text to vectors**

In [None]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

In [None]:
series_embeddings = model.encode(series.full_description.values)

In [None]:
series_embeddings.shape

(5069, 384)

In [None]:
movies_embeddings = model.encode(movies.full_description.values)

In [None]:
movies_embeddings.shape

(835, 384)

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

series_similarity = cosine_similarity(series_embeddings)
movies_similarity = cosine_similarity(movies_embeddings)

In [None]:
series_similarity.shape, movies_similarity.shape

((5069, 5069), (835, 835))

In [None]:
# divide series similarity into two different arrays
divide = (series_similarity.shape[0]) // 2
# print(divide)

first_half = series_similarity[0:divide]
second_half = series_similarity[divide:]
print(first_half.shape, second_half.shape)

print(type(first_half), type(series_similarity), type(second_half))

(2534, 5069) (2535, 5069)
<class 'numpy.ndarray'> <class 'numpy.ndarray'> <class 'numpy.ndarray'>


## **Save model**

In [None]:
import pickle

# save the series similarity into binary
pickle.dump(first_half, open(Path.cwd()/'assets/bin/series_similarity1.pkl', 'wb'))
pickle.dump(second_half, open(Path.cwd()/'assets/bin/series_similarity2.pkl', 'wb'))

# save the movies similarity into binary
pickle.dump(movies_similarity, open(Path.cwd()/'assets/bin/movies_similarity.pkl', 'wb'))

## **Model for Description Page**

In [None]:
animes = series.append(movies)
animes.shape

(5904, 6)

In [None]:
description_embeddings = model.encode(animes.description.values)

In [None]:
description_embeddings.shape

(5904, 384)

In [None]:
pickle.dump(description_embeddings, open(Path.cwd()/'assets/bin/dembeddings.pkl', 'wb'))

## **Test the `model performance`**

In [None]:
description = input('')

A short temper boy joins basketball club to win over his crush


In [None]:
embeddings = model.encode(description)

In [None]:
similarity = cosine_similarity(embeddings.reshape(1, 384), description_embeddings)
similarity.shape

(1, 5904)

In [None]:
similarity

array([[0.01053745, 0.06218959, 0.03710736, ..., 0.05944321, 0.16623399,
        0.08872547]], dtype=float32)

In [None]:
def recommend():
    distances = sorted(list(enumerate(similarity[0])),reverse=True,key = lambda x: x[1])
    for i in distances[0:11]:
        try:
            print(animes.iloc[i[0]].title)
        except:
            continue

In [None]:
recommend()

Slam Dunk
Ahiru no Sora
Hanebado!
Saki
Haikyu!!
Kuroko's Basketball 3
Chibi Maruko-chan Movie
Attack on Tomorrow
Ao Ashi
Tsurune - The Linking Shot -
Ro-Kyu-Bu ~ Fast Break!
