# Recommendation Models

In [1]:
# Dependencies
import pandas as pd
from nltk.corpus import stopwords
from sklearn.metrics.pairwise import linear_kernel
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.tokenize import RegexpTokenizer
from sklearn.metrics.pairwise import cosine_similarity

pd.set_option('display.max_columns', None)

In [2]:
df = pd.read_csv('cleaned_out_data.csv')

In [3]:
df['cleaned_description'].isna().sum()

60

In [4]:
df.dropna(subset = ['cleaned_description'], inplace = True)

In [5]:
def recommend(data, title):
    """ function to produce 5 recommendation (titles in romaji) given a manga series title """
    
    # Convert the index into series
    indices = pd.Series(data.index, index = data['title.romaji'])
    
    #Converting the manga description into vectors and used bigram
    tf = TfidfVectorizer(analyzer='word', ngram_range=(2, 2), min_df = 1, stop_words='english')
    tfidf_matrix = tf.fit_transform(data['cleaned_description'])
    
    # Calculating the similarity measures based on Cosine Similarity
    sg = cosine_similarity(tfidf_matrix, tfidf_matrix)
    
    # Get the index corresponding to original_title
    idx = indices[title]
    
    # Get the pairwise similarity scores 
    sig = list(enumerate(sg[idx]))
    
    # Sort the books
    sig = sorted(sig, key=lambda x: x[1], reverse=True)
    
    # Scores of the 5 most similar books 
    sig = sig[1:6]
    
    # Manga indicies
    manga_indices = [i[0] for i in sig]
   
    # Top 5 book recommendation
    rec = data.iloc[manga_indices]
    rec.reset_index(drop = True, inplace = True)
    
    return rec[['title.romaji', 'title.english', 'siteUrl']]

In [6]:
df[['title.romaji', 'genres']].head(10)

Unnamed: 0,title.romaji,genres
0,MONSTER,"['Drama', 'Mystery', 'Psychological', 'Thriller']"
1,Berserk,"['Action', 'Adventure', 'Drama', 'Fantasy', 'H..."
2,20th Century Boys,"['Drama', 'Mystery', 'Psychological', 'Sci-Fi'..."
3,Yokohama Kaidashi Kikou,"['Sci-Fi', 'Slice of Life']"
4,Hajime no Ippo,"['Action', 'Comedy', 'Drama', 'Sports']"
5,Full Moon wo Sagashite,"['Drama', 'Mahou Shoujo', 'Music', 'Romance', ..."
6,Tsubasa: RESERVoir CHRoNiCLE,"['Action', 'Adventure', 'Drama', 'Fantasy', 'R..."
7,xxxHolic,"['Comedy', 'Drama', 'Mystery', 'Supernatural']"
8,NARUTO,"['Action', 'Adventure']"
9,BLEACH,"['Action', 'Adventure', 'Supernatural']"


In [7]:
recommend(df, 'NARUTO')

Unnamed: 0,title.romaji,title.english,siteUrl
0,Zone,Zone,https://anilist.co/manga/35147
1,Kaze no Stigma,Stigma of the Wind,https://anilist.co/manga/33237
2,Tsuki no Toiki: Ai no Kizu,Tail of the Moon: The Other Hanzo(u),https://anilist.co/manga/31815
3,Kootetsu no Hana,,https://anilist.co/manga/33949
4,Sword of the Dark Ones,Sword of the Dark Ones,https://anilist.co/manga/31354
