In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score
import warnings
plt.rcParams['figure.figsize'] = (12,8)
warnings.filterwarnings('ignore')

In [2]:
data = pd.read_csv(r"/content/anime.csv")

In [3]:
data

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266
...,...,...,...,...,...,...,...
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,1,4.15,211
12290,5543,Under World,Hentai,OVA,1,4.28,183
12291,5621,Violence Gekiga David no Hoshi,Hentai,OVA,4,4.88,219
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,4.98,175


In [4]:
data.drop(['anime_id', 'episodes'], axis = 1, inplace = True) # not important for recommendation system

In [5]:
data

Unnamed: 0,name,genre,type,rating,members
0,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,9.37,200630
1,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,9.26,793665
2,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,9.25,114262
3,Steins;Gate,"Sci-Fi, Thriller",TV,9.17,673572
4,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,9.16,151266
...,...,...,...,...,...
12289,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,4.15,211
12290,Under World,Hentai,OVA,4.28,183
12291,Violence Gekiga David no Hoshi,Hentai,OVA,4.88,219
12292,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,4.98,175


In [6]:
data.describe()

Unnamed: 0,rating,members
count,12064.0,12294.0
mean,6.473902,18071.34
std,1.026746,54820.68
min,1.67,5.0
25%,5.88,225.0
50%,6.57,1550.0
75%,7.18,9437.0
max,10.0,1013917.0


In [7]:
data.duplicated().sum()

np.int64(0)

In [8]:
data.isnull().sum()

Unnamed: 0,0
name,0
genre,62
type,25
rating,230
members,0


In [9]:
null_percentage = (data.isnull().sum() / data.shape[0]) * 100
for i in data:
    print(f'Null value percentage of {i} is :{null_percentage[i]}')

Null value percentage of name is :0.0
Null value percentage of genre is :0.504311046038718
Null value percentage of type is :0.20335122824141857
Null value percentage of rating is :1.8708312998210508
Null value percentage of members is :0.0


In [10]:
data.dropna(inplace = True)
data.reset_index(drop=True, inplace= True)

In [11]:
data

Unnamed: 0,name,genre,type,rating,members
0,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,9.37,200630
1,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,9.26,793665
2,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,9.25,114262
3,Steins;Gate,"Sci-Fi, Thriller",TV,9.17,673572
4,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,9.16,151266
...,...,...,...,...,...
12012,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,4.15,211
12013,Under World,Hentai,OVA,4.28,183
12014,Violence Gekiga David no Hoshi,Hentai,OVA,4.88,219
12015,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,4.98,175


In [12]:
type_ohe = pd.get_dummies(data['type'], prefix = 'type', drop_first = True).astype(int)

In [13]:
data = pd.concat([data, type_ohe], axis = 1)

In [14]:
data

Unnamed: 0,name,genre,type,rating,members,type_Music,type_ONA,type_OVA,type_Special,type_TV
0,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,9.37,200630,0,0,0,0,0
1,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,9.26,793665,0,0,0,0,1
2,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,9.25,114262,0,0,0,0,1
3,Steins;Gate,"Sci-Fi, Thriller",TV,9.17,673572,0,0,0,0,1
4,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,9.16,151266,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...
12012,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,4.15,211,0,0,1,0,0
12013,Under World,Hentai,OVA,4.28,183,0,0,1,0,0
12014,Violence Gekiga David no Hoshi,Hentai,OVA,4.88,219,0,0,1,0,0
12015,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,4.98,175,0,0,1,0,0


In [15]:
data.drop('type', axis = 1, inplace = True)

In [16]:
data1 = data.copy()

In [17]:
data['genre'].unique()

array(['Drama, Romance, School, Supernatural',
       'Action, Adventure, Drama, Fantasy, Magic, Military, Shounen',
       'Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen',
       ..., 'Action, Comedy, Hentai, Romance, Supernatural',
       'Hentai, Sports', 'Hentai, Slice of Life'], dtype=object)

In [18]:
data['genre_split'] = data['genre'].fillna('').apply(lambda x: x.split(', '))

In [19]:
data

Unnamed: 0,name,genre,rating,members,type_Music,type_ONA,type_OVA,type_Special,type_TV,genre_split
0,Kimi no Na wa.,"Drama, Romance, School, Supernatural",9.37,200630,0,0,0,0,0,"[Drama, Romance, School, Supernatural]"
1,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",9.26,793665,0,0,0,0,1,"[Action, Adventure, Drama, Fantasy, Magic, Mil..."
2,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",9.25,114262,0,0,0,0,1,"[Action, Comedy, Historical, Parody, Samurai, ..."
3,Steins;Gate,"Sci-Fi, Thriller",9.17,673572,0,0,0,0,1,"[Sci-Fi, Thriller]"
4,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",9.16,151266,0,0,0,0,1,"[Action, Comedy, Historical, Parody, Samurai, ..."
...,...,...,...,...,...,...,...,...,...,...
12012,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,4.15,211,0,0,1,0,0,[Hentai]
12013,Under World,Hentai,4.28,183,0,0,1,0,0,[Hentai]
12014,Violence Gekiga David no Hoshi,Hentai,4.88,219,0,0,1,0,0,[Hentai]
12015,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,4.98,175,0,0,1,0,0,[Hentai]


In [20]:
mlb = MultiLabelBinarizer()
genre_encoded = mlb.fit_transform(data['genre_split'])
genre_df = pd.DataFrame(genre_encoded, columns=mlb.classes_, index = data.index)

In [21]:
genre_df

Unnamed: 0,Action,Adventure,Cars,Comedy,Dementia,Demons,Drama,Ecchi,Fantasy,Game,...,Shounen Ai,Slice of Life,Space,Sports,Super Power,Supernatural,Thriller,Vampire,Yaoi,Yuri
0,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,1,1,0,0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
4,1,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12012,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12013,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12014,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12015,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [22]:
data.drop(['genre', 'genre_split'], axis = 1, inplace = True)

In [23]:
data = pd.concat([data,genre_df], axis = 1)

In [24]:
data

Unnamed: 0,name,rating,members,type_Music,type_ONA,type_OVA,type_Special,type_TV,Action,Adventure,...,Shounen Ai,Slice of Life,Space,Sports,Super Power,Supernatural,Thriller,Vampire,Yaoi,Yuri
0,Kimi no Na wa.,9.37,200630,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,Fullmetal Alchemist: Brotherhood,9.26,793665,0,0,0,0,1,1,1,...,0,0,0,0,0,0,0,0,0,0
2,Gintama°,9.25,114262,0,0,0,0,1,1,0,...,0,0,0,0,0,0,0,0,0,0
3,Steins;Gate,9.17,673572,0,0,0,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0
4,Gintama&#039;,9.16,151266,0,0,0,0,1,1,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12012,Toushindai My Lover: Minami tai Mecha-Minami,4.15,211,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12013,Under World,4.28,183,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12014,Violence Gekiga David no Hoshi,4.88,219,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12015,Violence Gekiga Shin David no Hoshi: Inma Dens...,4.98,175,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [25]:
sc = StandardScaler()
data[['rating', 'members']] = sc.fit_transform(data[['rating', 'members']])

In [26]:
data

Unnamed: 0,name,rating,members,type_Music,type_ONA,type_OVA,type_Special,type_TV,Action,Adventure,...,Shounen Ai,Slice of Life,Space,Sports,Super Power,Supernatural,Thriller,Vampire,Yaoi,Yuri
0,Kimi no Na wa.,2.824474,3.292044,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,Fullmetal Alchemist: Brotherhood,2.717032,14.002410,0,0,0,0,1,1,1,...,0,0,0,0,0,0,0,0,0,0
2,Gintama°,2.707265,1.732216,0,0,0,0,1,1,0,...,0,0,0,0,0,0,0,0,0,0
3,Steins;Gate,2.629126,11.833499,0,0,0,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0
4,Gintama&#039;,2.619358,2.400518,0,0,0,0,1,1,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12012,Toushindai My Lover: Minami tai Mecha-Minami,-2.274108,-0.327575,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12013,Under World,-2.147132,-0.328080,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12014,Violence Gekiga David no Hoshi,-1.561088,-0.327430,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12015,Violence Gekiga Shin David no Hoshi: Inma Dens...,-1.463414,-0.328225,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [27]:
cosine_sim = cosine_similarity(data.drop('name', axis = 1))

In [28]:
# Turn cosine matrix into a DataFrame
cosine_df = pd.DataFrame(cosine_sim, index=data['name'], columns=data['name'])

In [29]:
cosine_df

name,Kimi no Na wa.,Fullmetal Alchemist: Brotherhood,Gintama°,Steins;Gate,Gintama&#039;,Haikyuu!!: Karasuno Koukou VS Shiratorizawa Gakuen Koukou,Hunter x Hunter (2011),Ginga Eiyuu Densetsu,Gintama Movie: Kanketsu-hen - Yorozuya yo Eien Nare,Gintama&#039;: Enchousen,...,Silent Chaser Kagami,Super Erotic Anime,Teleclub no Himitsu,Tenshi no Habataki Jun,The Satisfaction,Toushindai My Lover: Minami tai Mecha-Minami,Under World,Violence Gekiga David no Hoshi,Violence Gekiga Shin David no Hoshi: Inma Densetsu,Yasuji no Pornorama: Yacchimae!!
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Kimi no Na wa.,1.000000,0.788556,0.652772,0.793004,0.705381,0.756603,0.813671,0.698192,0.574481,0.577248,...,-0.532867,-0.569321,-0.555880,-0.574820,-0.573112,-0.582112,-0.577099,-0.538898,-0.529531,-0.562588
Fullmetal Alchemist: Brotherhood,0.788556,1.000000,0.555943,0.976301,0.662207,0.522170,0.966699,0.474289,0.409028,0.446020,...,-0.286232,-0.279784,-0.282781,-0.277353,-0.278215,-0.274413,-0.276677,-0.284740,-0.285972,-0.346960
Gintama°,0.652772,0.555943,1.000000,0.564912,0.990054,0.757520,0.655011,0.645033,0.958093,0.991496,...,-0.517253,-0.564252,-0.546646,-0.571990,-0.569540,-0.582129,-0.575051,-0.525219,-0.513332,-0.526134
Steins;Gate,0.793004,0.976301,0.564912,1.000000,0.670630,0.509814,0.954879,0.479755,0.413971,0.454180,...,-0.306745,-0.302714,-0.304796,-0.300705,-0.301431,-0.298313,-0.300190,-0.305669,-0.306297,-0.366874
Gintama&#039;,0.705381,0.662207,0.990054,0.670630,1.000000,0.753044,0.744779,0.640408,0.928034,0.964096,...,-0.497042,-0.536064,-0.521556,-0.542231,-0.540295,-0.550355,-0.544723,-0.503580,-0.493635,-0.516135
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Toushindai My Lover: Minami tai Mecha-Minami,-0.582112,-0.274413,-0.582129,-0.298313,-0.550355,-0.617768,-0.379088,-0.538644,-0.597735,-0.578031,...,0.979374,0.997929,0.992819,0.999312,0.998944,1.000000,0.999652,0.983674,0.977342,0.862385
Under World,-0.577099,-0.276677,-0.575051,-0.300190,-0.544723,-0.609710,-0.379144,-0.525651,-0.589317,-0.570151,...,0.984366,0.999279,0.995631,0.999943,0.999809,0.999652,1.000000,0.988082,0.982589,0.864075
Violence Gekiga David no Hoshi,-0.538898,-0.284740,-0.525219,-0.305669,-0.503580,-0.553746,-0.372979,-0.442076,-0.531670,-0.515872,...,0.999746,0.993212,0.998138,0.989672,0.990904,0.983674,0.988082,1.000000,0.999477,0.861725
Violence Gekiga Shin David no Hoshi: Inma Densetsu,-0.529531,-0.285972,-0.513332,-0.306297,-0.493635,-0.540464,-0.370933,-0.423335,-0.518060,-0.503029,...,0.999952,0.988932,0.995644,0.984520,0.986036,0.977342,0.982589,0.999477,1.000000,0.858672


In [30]:
def recommend_anime(anime_names, cosine_df, top_n=10, min_similarity=0.5):
    # Check for missing anime names
    missing = [name for name in anime_names if name not in cosine_df.index]
    if missing:
        print(f"Missing anime names: {missing}")

    recommendations = {}

    # For each valid anime name, get recommendations
    for name in anime_names:
        if name not in cosine_df.index:
            # Skip missing anime
            continue

        # Get similarity scores row for this anime
        sim_scores = cosine_df.loc[name]

        # Exclude itself and filter by similarity threshold
        sim_scores = sim_scores[sim_scores.index != name]
        sim_scores = sim_scores[sim_scores >= min_similarity]

        # Sort and take top N
        top_similar = sim_scores.sort_values(ascending=False).head(top_n)

        recommendations[name] = top_similar

    return recommendations


In [31]:
anime_list = ['Kimi no Na wa.', 'Hotarubi no Mori e', 'Fullmetal Alchemist: Brotherhood',
              'Hunter x Hunter (2011)', 'Ginga Eiyuu Densetsu', 'Under World', 'One Piece']
results = recommend_anime(anime_list, cosine_df, top_n=5, min_similarity=0.8)
for anime, recs in results.items():
    print(f"** Recommendations for {anime} **:")
    print(recs)
    print('=' * 100)


** Recommendations for Kimi no Na wa. **:
name
Hotarubi no Mori e                                          0.943022
Clannad: After Story - Mou Hitotsu no Sekai, Kyou-hen       0.909692
Suzumiya Haruhi no Shoushitsu                               0.907460
Yahari Ore no Seishun Love Comedy wa Machigatteiru. Zoku    0.905412
Hotaru no Haka                                              0.894553
Name: Kimi no Na wa., dtype: float64
** Recommendations for Hotarubi no Mori e **:
name
Kimi no Na wa.          0.943022
Clannad: After Story    0.901507
Natsume Yuujinchou      0.901150
Kanon (2006)            0.900328
Howl no Ugoku Shiro     0.897903
Name: Hotarubi no Mori e, dtype: float64
** Recommendations for Fullmetal Alchemist: Brotherhood **:
name
Fullmetal Alchemist    0.993701
Shingeki no Kyojin     0.988777
Fairy Tail             0.988660
Akame ga Kill!         0.986022
Noragami               0.981955
Name: Fullmetal Alchemist: Brotherhood, dtype: float64
** Recommendations for Hunter x Hu

In [32]:
data[(data['name'] == 'One Piece') | (data['name'] == 'Dragon Ball Z')]

Unnamed: 0,name,rating,members,type_Music,type_ONA,type_OVA,type_Special,type_TV,Action,Adventure,...,Shounen Ai,Slice of Life,Space,Sports,Super Power,Supernatural,Thriller,Vampire,Yaoi,Yuri
74,One Piece,2.052849,8.786553,0,0,0,0,1,1,1,...,0,0,0,0,1,0,0,0,0,0
206,Dragon Ball Z,1.798897,6.453167,0,0,0,0,1,1,1,...,0,0,0,0,1,0,0,0,0,0


### Interview Questions