## ANIME RECOMMENDATION SYSTEM

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
import seaborn as sns
%matplotlib inline

In [3]:
anime = pd.read_csv("/kaggle/input/anime-recommend-shanmukh/anime (1).csv")

In [4]:
anime.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


# Data Preprocessing : 

In [5]:
anime.loc[(anime["genre"]=="Hentai") & (anime["episodes"]=="Unknown"),"episodes"] = "1"
anime.loc[(anime["type"]=="OVA") & (anime["episodes"]=="Unknown"),"episodes"] = "1"

anime.loc[(anime["type"] == "Movie") & (anime["episodes"] == "Unknown")] = "1"

  anime.loc[(anime["type"] == "Movie") & (anime["episodes"] == "Unknown")] = "1"
  anime.loc[(anime["type"] == "Movie") & (anime["episodes"] == "Unknown")] = "1"
  anime.loc[(anime["type"] == "Movie") & (anime["episodes"] == "Unknown")] = "1"


In [6]:
known_animes = {"Naruto Shippuuden":500, "One Piece":784,"Detective Conan":854, "Dragon Ball Super":86,
                "Crayon Shin chan":942, "Yu Gi Oh Arc V":148,"Shingeki no Kyojin Season 2":25,
                "Boku no Hero Academia 2nd Season":25,"Little Witch Academia TV":25}

In [7]:
for k,v in known_animes.items():    
    anime.loc[anime["name"]==k,"episodes"] = v

In [8]:
anime["episodes"] = anime["episodes"].map(lambda x:np.nan if x=="Unknown" else x)

In [9]:
anime["episodes"].fillna(anime["episodes"].median(),inplace = True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  anime["episodes"].fillna(anime["episodes"].median(),inplace = True)


### Rating 

Many animes have unknown ratings. These were filled with the median of the ratings.

In [10]:
anime["rating"] = anime["rating"].astype(float)

In [11]:
anime["rating"].fillna(anime["rating"].median(),inplace = True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  anime["rating"].fillna(anime["rating"].median(),inplace = True)


### Type 

Type category differentiates between movies, music, TV shows(regular anime episodes), OVA/ONA etc. These are categorical variables so I used ```pd.get_dummies``` to convert them to dummy variables.

In [12]:
pd.get_dummies(anime[["type"]]).head()

Unnamed: 0,type_1,type_Movie,type_Music,type_ONA,type_OVA,type_Special,type_TV
0,False,True,False,False,False,False,False
1,False,False,False,False,False,False,True
2,False,False,False,False,False,False,True
3,False,False,False,False,False,False,True
4,False,False,False,False,False,False,True


In [13]:
anime["members"] = anime["members"].astype(float)

# Feature Selection and Preprocessing



In [14]:
anime_features = pd.concat([anime["genre"].str.get_dummies(sep=","),pd.get_dummies(anime[["type"]]),anime[["rating"]],anime[["members"]],anime["episodes"]],axis=1)

In [15]:
anime["name"] = anime["name"].map(lambda name:re.sub('[^A-Za-z0-9]+', " ", name))

In [16]:
anime_features.head()

Unnamed: 0,Adventure,Cars,Comedy,Dementia,Demons,Drama,Ecchi,Fantasy,Game,Harem,...,type_1,type_Movie,type_Music,type_ONA,type_OVA,type_Special,type_TV,rating,members,episodes
0,0,0,0,0,0,0,0,0,0,0,...,False,True,False,False,False,False,False,9.37,200630.0,1
1,1,0,0,0,0,1,0,1,0,0,...,False,False,False,False,False,False,True,9.26,793665.0,64
2,0,0,1,0,0,0,0,0,0,0,...,False,False,False,False,False,False,True,9.25,114262.0,51
3,0,0,0,0,0,0,0,0,0,0,...,False,False,False,False,False,False,True,9.17,673572.0,24
4,0,0,1,0,0,0,0,0,0,0,...,False,False,False,False,False,False,True,9.16,151266.0,51


In [17]:
anime_features.columns

Index([' Adventure', ' Cars', ' Comedy', ' Dementia', ' Demons', ' Drama',
       ' Ecchi', ' Fantasy', ' Game', ' Harem', ' Hentai', ' Historical',
       ' Horror', ' Josei', ' Kids', ' Magic', ' Martial Arts', ' Mecha',
       ' Military', ' Music', ' Mystery', ' Parody', ' Police',
       ' Psychological', ' Romance', ' Samurai', ' School', ' Sci-Fi',
       ' Seinen', ' Shoujo', ' Shoujo Ai', ' Shounen', ' Shounen Ai',
       ' Slice of Life', ' Space', ' Sports', ' Super Power', ' Supernatural',
       ' Thriller', ' Vampire', ' Yaoi', ' Yuri', '1', 'Action', 'Adventure',
       'Cars', 'Comedy', 'Dementia', 'Demons', 'Drama', 'Ecchi', 'Fantasy',
       'Game', 'Harem', 'Hentai', 'Historical', 'Horror', 'Josei', 'Kids',
       'Magic', 'Martial Arts', 'Mecha', 'Military', 'Music', 'Mystery',
       'Parody', 'Police', 'Psychological', 'Romance', 'Samurai', 'School',
       'Sci-Fi', 'Seinen', 'Shoujo', 'Shounen', 'Slice of Life', 'Space',
       'Sports', 'Super Power', 'Supernat

In [18]:
from sklearn.preprocessing import MaxAbsScaler

In [19]:
max_abs_scaler = MaxAbsScaler()
anime_features = max_abs_scaler.fit_transform(anime_features)

# KNN for finding similar animes

In [20]:
from sklearn.neighbors import NearestNeighbors

In [21]:
nbrs = NearestNeighbors(n_neighbors=6, algorithm='ball_tree').fit(anime_features)

In [22]:
distances, indices = nbrs.kneighbors(anime_features)

In [23]:
distances

array([[0.00000000e+00, 1.01506549e+00, 1.03095542e+00, 1.03102550e+00,
        1.41625771e+00, 1.43204444e+00],
       [0.00000000e+00, 1.02227158e+00, 1.49378838e+00, 1.51625674e+00,
        1.56476418e+00, 1.58623098e+00],
       [0.00000000e+00, 3.75894154e-02, 4.12559430e-02, 2.35028091e-01,
        2.87140333e-01, 1.41515906e+00],
       ...,
       [0.00000000e+00, 1.66436926e-03, 1.68253094e-03, 5.11962471e-03,
        6.22295696e-03, 6.22392854e-03],
       [0.00000000e+00, 1.00174940e-03, 1.10513608e-03, 2.00036985e-03,
        2.02599380e-03, 3.00064340e-03],
       [0.00000000e+00, 1.00000201e+00, 1.00001800e+00, 1.00001803e+00,
        1.00002450e+00, 1.00004050e+00]])

In [24]:
indices

array([[    0,   208,  1494,  1959,    60,  2103],
       [    1,   200,   268,   101,   795,   290],
       [    2,     4,     9,    12, 10896,     8],
       ...,
       [12291, 12238, 12237, 12236, 12256, 12235],
       [12292, 12231, 12232, 12230, 12229, 12283],
       [12293,  7426,  8279,  7349,  7335,  7498]])

# Query examples and helper functions 

In [25]:

def get_index_from_name(name):
    for i in range((anime.shape[0])):
        if anime["name"][i]==name:
             return i

In [26]:
get_index_from_name("Naruto")

841

In [27]:
all_anime_names = list(anime.name.values)

In [28]:
def get_id_from_partial_name(partial):
    for name in all_anime_names:
        if partial in name:
            print(name,all_anime_names.index(name))

In [30]:
""" print_similar_query can search for similar animes both by id and by name. """

def print_similar_animes(query):
#     if id:
#         for id in indices[id][1:]:
#             print(anime.iloc[id]["name"])
    
        found_id = get_index_from_name(query)
        for id in indices[found_id][1:]:
            print(anime.iloc[id]["name"])

# Model Prediction
## Here Similar anime prints anime which are same in genre and type.

In [31]:
print_similar_animes("Death Billiards")

Omoide no Marnie
Saibi
Death Parade
Mahou Shoujo Madoka Magica Movie 1 Hajimari no Monogatari
Mahou Shoujo Madoka Magica Movie 2 Eien no Monogatari


In [32]:
print_similar_animes("Demon Slayer")

In [33]:
print_similar_animes("Mushishi")

Mushishi Zoku Shou
Mushishi Zoku Shou 2nd Season
Mushishi Special Hihamukage
Mushishi Zoku Shou Odoro no Michi
Mushishi Zoku Shou Suzu no Shizuku


In [34]:
print_similar_animes("Gintama")

Gintama 039 
Gintama 
Gintama 039 Enchousen
Gintama 2017 
Gintama Movie Kanketsu hen Yorozuya yo Eien Nare


In [35]:
print_similar_animes("Fairy Tail")

Fairy Tail 2014 
Magi The Labyrinth of Magic
Magi The Kingdom of Magic
Densetsu no Yuusha no Densetsu
Magi Sinbad no Bouken TV 


In [36]:
get_id_from_partial_name("Naruto")

Boruto Naruto the Movie 486
Naruto Shippuuden 615
The Last Naruto the Movie 719
Naruto Shippuuden Movie 6 Road to Ninja 784
Naruto 841
Boruto Naruto the Movie Naruto ga Hokage ni Natta Hi 1103
Naruto Shippuuden Movie 5 Blood Prison 1237
Naruto x UT 1343
Naruto Shippuuden Movie 4 The Lost Tower 1472
Naruto Shippuuden Movie 3 Hi no Ishi wo Tsugu Mono 1573
Naruto Shippuuden Movie 1 1827
Naruto Shippuuden Movie 2 Kizuna 1828
Naruto Shippuuden Shippuu quot Konoha Gakuen quot Den 2374
Naruto Honoo no Chuunin Shiken Naruto vs Konohamaru  2416
Naruto SD Rock Lee no Seishun Full Power Ninden 2457
Naruto Shippuuden Sunny Side Battle 2458
Naruto Movie 1 Dai Katsugeki Yuki Hime Shinobu Houjou Dattebayo  2756
Naruto Soyokazeden Movie Naruto to Mashin to Mitsu no Onegai Dattebayo  2997
Naruto Movie 2 Dai Gekitotsu Maboroshi no Chiteiiseki Dattebayo  3449
Naruto Dai Katsugeki Yuki Hime Shinobu Houjou Dattebayo Special Konoha Annual Sports Festival 3529
Naruto Movie 3 Dai Koufun Mikazuki Jima no Anima

In [37]:
print_similar_animes("Kimi no Na wa ")

Kokoro ga Sakebitagatterunda 
Harmonie
Air Movie
Hotarubi no Mori e
Clannad Movie


In [41]:
# Create DataFrame for predictions
df_output = pd.DataFrame(indices)

# Define filename for saving predictions
filename = "/kaggle/working/anime_prediction.csv"

# Save predictions to CSV file
df_output.to_csv(filename, index=False)

print(f"Successfully Saved in File: {filename}")
print('\nParticipants can download "predictions.csv" and upload it using the "Submit Prediction" button on the competition page.')

# Display the first 10 rows of predictions DataFrame
print("\nFirst few rows of saved predictions:")
display(df_output.head(n=10))

Successfully Saved in File: /kaggle/working/anime_prediction.csv

Participants can download "predictions.csv" and upload it using the "Submit Prediction" button on the competition page.

First few rows of saved predictions:


Unnamed: 0,0,1,2,3,4,5
0,0,208,1494,1959,60,2103
1,1,200,268,101,795,290
2,2,4,9,12,10896,8
3,3,10898,3581,10975,10858,8910
4,4,2,9,12,10896,8
5,5,79,14,43,32,44
6,6,112,9673,1796,1887,3757
7,7,413,1426,3370,5062,305
8,8,65,63,9,2,4
9,9,2,4,10896,12,8


In [40]:
# Create DataFrame for predictions
df_output = anime

# Define filename for saving predictions
filename = "/kaggle/working/anime_preprocessed.csv"

# Save predictions to CSV file
df_output.to_csv(filename, index=False)

print(f"Successfully Saved in File: {filename}")
print('\nParticipants can download "predictions.csv" and upload it using the "Submit Prediction" button on the competition page.')

# Display the first 10 rows of predictions DataFrame
print("\nFirst few rows of saved predictions:")
display(df_output.head(n=10))

Successfully Saved in File: /kaggle/working/anime_preprocessed.csv

Participants can download "predictions.csv" and upload it using the "Submit Prediction" button on the competition page.

First few rows of saved predictions:


Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630.0
1,5114,Fullmetal Alchemist Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665.0
2,28977,Gintama,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262.0
3,9253,Steins Gate,"Sci-Fi, Thriller",TV,24,9.17,673572.0
4,9969,Gintama 039,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266.0
5,32935,Haikyuu Karasuno Koukou VS Shiratorizawa Gakue...,"Comedy, Drama, School, Shounen, Sports",TV,10,9.15,93351.0
6,11061,Hunter x Hunter 2011,"Action, Adventure, Shounen, Super Power",TV,148,9.13,425855.0
7,820,Ginga Eiyuu Densetsu,"Drama, Military, Sci-Fi, Space",OVA,110,9.11,80679.0
8,15335,Gintama Movie Kanketsu hen Yorozuya yo Eien Nare,"Action, Comedy, Historical, Parody, Samurai, S...",Movie,1,9.1,72534.0
9,15417,Gintama 039 Enchousen,"Action, Comedy, Historical, Parody, Samurai, S...",TV,13,9.11,81109.0
