#### Anime Recommendation System using Cosine Similarity

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("C:/Users/harik/Data science Assignment/anime.csv")
df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB


In [4]:
numeric_columns = ['episodes', 'rating', 'members']

for col in numeric_columns:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')

df[numeric_columns] = df[numeric_columns].fillna(df[numeric_columns].median())

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
numerical_scaled = scaler.fit_transform(df[numeric_columns])

In [5]:
print("Dataset Shape:", df.shape)
print("Columns:", df.columns.tolist())

Dataset Shape: (12294, 7)
Columns: ['anime_id', 'name', 'genre', 'type', 'episodes', 'rating', 'members']


In [6]:
df['genre'] = df['genre'].astype(str)

In [7]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer(stop_words='english')
genre_matrix = tfidf.fit_transform(df['genre'])

In [8]:
scaler = MinMaxScaler()

numerical_features = df[['rating', 'episodes', 'members']]
numerical_scaled = scaler.fit_transform(numerical_features)

In [9]:
from scipy.sparse import hstack

feature_matrix = hstack([genre_matrix, numerical_scaled])

In [10]:
from sklearn.metrics.pairwise import cosine_similarity

In [11]:
cosine_sim = cosine_similarity(feature_matrix)

In [12]:
def recommend_anime(anime_title, top_n=10, similarity_threshold=0.3):
    
    if anime_title not in df['name'].values:
        return "Anime not found in dataset."
    
    idx = df[df['name'] == anime_title].index[0]
    
    similarity_scores = list(enumerate(cosine_sim[idx]))
    
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    
    # Remove itself
    similarity_scores = similarity_scores[1:]
    
    # Apply threshold
    similarity_scores = [
        (i, score) for i, score in similarity_scores if score >= similarity_threshold
    ]
    
    top_indices = [i for i, score in similarity_scores[:top_n]]
    
    return df[['name', 'genre', 'rating', 'type']].iloc[top_indices]


In [13]:
recommend_anime(
    anime_title="Naruto",
    top_n=10,
    similarity_threshold=0.35
)

Unnamed: 0,name,genre,rating,type
615,Naruto: Shippuuden,"Action, Comedy, Martial Arts, Shounen, Super P...",7.94,TV
206,Dragon Ball Z,"Action, Adventure, Comedy, Fantasy, Martial Ar...",8.32,TV
346,Dragon Ball,"Adventure, Comedy, Fantasy, Martial Arts, Shou...",8.16,TV
1472,Naruto: Shippuuden Movie 4 - The Lost Tower,"Action, Comedy, Martial Arts, Shounen, Super P...",7.53,Movie
1573,Naruto: Shippuuden Movie 3 - Hi no Ishi wo Tsu...,"Action, Comedy, Martial Arts, Shounen, Super P...",7.5,Movie
486,Boruto: Naruto the Movie,"Action, Comedy, Martial Arts, Shounen, Super P...",8.03,Movie
1343,Naruto x UT,"Action, Comedy, Martial Arts, Shounen, Super P...",7.58,OVA
2997,Naruto Soyokazeden Movie: Naruto to Mashin to ...,"Action, Comedy, Martial Arts, Shounen, Super P...",7.11,Movie
588,Dragon Ball Kai,"Action, Adventure, Comedy, Fantasy, Martial Ar...",7.95,TV
1103,Boruto: Naruto the Movie - Naruto ga Hokage ni...,"Action, Comedy, Martial Arts, Shounen, Super P...",7.68,Special
