In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('anime.csv')
df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [3]:
df.shape

(12294, 7)

In [4]:
df.isnull().sum()

anime_id      0
name          0
genre        62
type         25
episodes      0
rating      230
members       0
dtype: int64

In [5]:
df.dropna(axis=0, subset=['genre', 'type', 'rating'], inplace=True)

In [6]:
df.shape

(12017, 7)

In [7]:
df.isnull().sum()

anime_id    0
name        0
genre       0
type        0
episodes    0
rating      0
members     0
dtype: int64

In [8]:
selected = pd.DataFrame(df['genre'] + " " + df['type'])

In [9]:
selected.head()

Unnamed: 0,0
0,"Drama, Romance, School, Supernatural Movie"
1,"Action, Adventure, Drama, Fantasy, Magic, Mili..."
2,"Action, Comedy, Historical, Parody, Samurai, S..."
3,"Sci-Fi, Thriller TV"
4,"Action, Comedy, Historical, Parody, Samurai, S..."


In [10]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [11]:
vector = TfidfVectorizer()

In [12]:
vectorized = vector.fit_transform(selected[0])

In [64]:
vectorized

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 52393 stored elements and shape (12017, 52)>

In [13]:
features = pd.DataFrame(vectorized.toarray())

In [14]:
features.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,42,43,44,45,46,47,48,49,50,51
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.405026,0.0,...,0.0,0.0,0.0,0.0,0.50325,0.0,0.0,0.0,0.0,0.0
1,0.2854,0.306009,0.0,0.0,0.0,0.0,0.0,0.0,0.324302,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.252854,0.0,0.0,0.0
2,0.24559,0.0,0.0,0.0,0.0,0.195601,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.217584,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.797012,0.293958,0.0,0.0,0.0
4,0.24559,0.0,0.0,0.0,0.0,0.195601,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.217584,0.0,0.0,0.0


In [15]:
from sklearn.metrics.pairwise import cosine_similarity

In [16]:
similarity = cosine_similarity(vectorized)

In [17]:
similarity

array([[1.        , 0.13135073, 0.        , ..., 0.        , 0.        ,
        0.24003343],
       [0.13135073, 1.        , 0.22384138, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.22384138, 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 1.        ,
        0.6463041 ],
       [0.        , 0.        , 0.        , ..., 1.        , 1.        ,
        0.6463041 ],
       [0.24003343, 0.        , 0.        , ..., 0.6463041 , 0.6463041 ,
        1.        ]])

In [19]:
df['rating_normalized'] = df['rating'] / 10.0

In [20]:
df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members,rating_normalized
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,0.937
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665,0.926
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262,0.925
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572,0.917
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266,0.916


In [60]:
import difflib
def recommend(title, df, similarity, top, alpha):
    match = difflib.get_close_matches(title, df['name'].tolist())
    index = df[df['name'] == match[0]].index[0]

    score = list(enumerate(similarity[index]))

    score = sorted(score, key=lambda x: x[1], reverse=True)

    recommendations = []
    for i, sim in score:
        rating = df.iloc[i]['rating_normalized']
        hybrid = alpha*sim + (1-alpha)*rating

        # recommendations.append((i, hybrid))
        recommendations.append({
            'Name': df.iloc[i]['name'],
            'Genre': df.iloc[i].get('genre', 'N/A'),
            'Rating': round(df.iloc[i].get('rating', 0), 2),
            'Similarity In %': round(hybrid, 2) * 100
        })

    recommendations = sorted(recommendations, key=lambda x: x['Similarity In %'], reverse=True)

    return pd.DataFrame(recommendations[:top])

In [63]:
recommend(title='kimi no na wa', df=df, similarity=similarity, top=10, alpha=0.5)

Unnamed: 0,Name,Genre,Rating,Similarity In %
0,Kimi no Na wa.,"Drama, Romance, School, Supernatural",9.37,97.0
1,Aura: Maryuuin Kouga Saigo no Tatakai,"Comedy, Drama, Romance, School, Supernatural",7.67,86.0
2,Kokoro ga Sakebitagatterunda.,"Drama, Romance, School",8.32,85.0
3,Harmonie,"Drama, School, Supernatural",7.52,82.0
4,Air Movie,"Drama, Romance, Supernatural",7.39,81.0
5,Hotarubi no Mori e,"Drama, Romance, Shoujo, Supernatural",8.61,80.0
6,Sen to Chihiro no Kamikakushi,"Adventure, Drama, Supernatural",8.93,78.0
7,Shakugan no Shana II (Second),"Action, Drama, Fantasy, Romance, School, Super...",7.79,77.0
8,Shakugan no Shana,"Action, Drama, Fantasy, Romance, School, Super...",7.74,77.0
9,Momo e no Tegami,"Drama, Supernatural",7.78,77.0
