### Data Loading and preprocessing

In [1]:
import pandas as pd

df= pd.read_csv(r"C:\Users\USER\Downloads\anime.csv")
df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [2]:
df.tail()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,1,4.15,211
12290,5543,Under World,Hentai,OVA,1,4.28,183
12291,5621,Violence Gekiga David no Hoshi,Hentai,OVA,4,4.88,219
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,4.98,175
12293,26081,Yasuji no Pornorama: Yacchimae!!,Hentai,Movie,1,5.46,142


In [3]:
#Dimension
df.shape

(12294, 7)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB


In [5]:
df.describe()

Unnamed: 0,anime_id,rating,members
count,12294.0,12064.0,12294.0
mean,14058.221653,6.473902,18071.34
std,11455.294701,1.026746,54820.68
min,1.0,1.67,5.0
25%,3484.25,5.88,225.0
50%,10260.5,6.57,1550.0
75%,24794.5,7.18,9437.0
max,34527.0,10.0,1013917.0


In [6]:
df.dtypes

anime_id      int64
name         object
genre        object
type         object
episodes     object
rating      float64
members       int64
dtype: object

In [7]:
df.duplicated().sum()

np.int64(0)

In [8]:
df.isnull().sum()

anime_id      0
name          0
genre        62
type         25
episodes      0
rating      230
members       0
dtype: int64

In [9]:
#Filling null values
df['genre'].fillna(df['genre'].mode()[0], inplace=True) #Picks the first mode value
df['type'].fillna(df['type'].mode()[0], inplace=True)
df['rating'].fillna(0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['genre'].fillna(df['genre'].mode()[0], inplace=True) #Picks the first mode value
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['type'].fillna(df['type'].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the interm

In [11]:
# Replace 'Unknown' with NaN
import numpy as np

df['episodes'] = df['episodes'].replace('Unknown', np.nan)

# Convert to numeric
df['episodes'] = pd.to_numeric(df['episodes'], errors='coerce')

# Fill missing values with median
df['episodes'].fillna(df['episodes'].median(), inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['episodes'].fillna(df['episodes'].median(), inplace=True)


In [12]:
df.isnull().sum()

anime_id    0
name        0
genre       0
type        0
episodes    0
rating      0
members     0
dtype: int64

In [13]:
df.columns

Index(['anime_id', 'name', 'genre', 'type', 'episodes', 'rating', 'members'], dtype='object')

In [15]:
# Split genres into list
df['genre'] = df['genre'].apply(lambda x: x.split(',') if isinstance(x,str) else [])

In [16]:
# Feature Extraction
# One-hot encode genres
from sklearn.preprocessing import MinMaxScaler, MultiLabelBinarizer

mlb = MultiLabelBinarizer()
genre_features = mlb.fit_transform(df['genre'])

In [17]:
# Normalize numerical features
scaler = MinMaxScaler()
numerical_features = scaler.fit_transform(df[['rating','episodes','members']])

In [20]:
# Combine features
features = np.hstack([genre_features, numerical_features]) #Horizontal stack -concatenation

In [23]:
#Compute Cosine Similarity

from sklearn.metrics.pairwise import cosine_similarity

similarity_matrix = cosine_similarity(features)

In [25]:
#Recommendation Function
def recommend_anime(title, top_n=10):
    if title not in df['name'].values:
        return f"Anime '{title}' not found in dataset"
    idx = df[df['name'] == title].index[0]
    sim_scores = list(enumerate(similarity_matrix[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n+1] # exclude the anime itself
    recommended = [df.iloc[i[0]]['name'] for i in sim_scores]
    return recommended

In [28]:
# Test Recommendations
recommendations = recommend_anime('Gintama°', top_n=10)
print('Recommended anime for Kimi no Na wa.:')
for i, anime in enumerate(recommendations,1):
    print(f'{i}. {anime}')

Recommended anime for Kimi no Na wa.:
1. Gintama&#039;
2. Gintama&#039;: Enchousen
3. Gintama Movie: Kanketsu-hen - Yorozuya yo Eien Nare
4. Gintama Movie: Shinyaku Benizakura-hen
5. Gintama: Yorinuki Gintama-san on Theater 2D
6. Gintama: Shinyaku Benizakura-hen
7. Gintama: Jump Festa 2014 Special
8. Gintama
9. Gintama (2017)
10. Gintama: Nanigoto mo Saiyo ga Kanjin nano de Tasho Senobisuru Kurai ga Choudoyoi
