# Anime Recommendation System using Cosine Similarity

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import precision_score, recall_score, f1_score

df = pd.read_csv('anime.csv')
df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [2]:
# Fill missing values
df['genre'] = df['genre'].fillna('Unknown')
df['type'] = df['type'].fillna('Unknown')
df['rating'] = df['rating'].fillna(df['rating'].mean())
df['members'] = df['members'].fillna(0)
df.isnull().sum()

anime_id    0
name        0
genre       0
type        0
episodes    0
rating      0
members     0
dtype: int64

In [3]:
# Feature extraction using TF-IDF on genre + normalization of numeric features
df['features'] = df['genre'] + ' ' + df['type']

tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['features'])

scaler = MinMaxScaler()
numeric_scaled = scaler.fit_transform(df[['rating','members']])

import scipy.sparse as sp
feature_matrix = sp.hstack([tfidf_matrix, numeric_scaled]).tocsr()

similarity_matrix = cosine_similarity(feature_matrix)

In [4]:
# Recommendation function
def recommend(anime_title, top_n=10):
    if anime_title not in df['name'].values:
        return []
    idx = df.index[df['name'] == anime_title][0]
    sim_scores = list(enumerate(similarity_matrix[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]
    recommendations = [df.iloc[i]['name'] for i,_ in sim_scores]
    return recommendations

recommend('Naruto') if 'Naruto' in df['name'].values else 'Naruto not found'

['Naruto: Shippuuden',
 'Dragon Ball Z',
 'Dragon Ball',
 'Dragon Ball Kai',
 'Dragon Ball Super',
 'Naruto: Shippuuden Movie 4 - The Lost Tower',
 'Naruto: Shippuuden Movie 3 - Hi no Ishi wo Tsugu Mono',
 'Boruto: Naruto the Movie',
 'Kurokami The Animation',
 'Rekka no Honoo']

In [5]:
# Evaluation using simple train-test split (genre-based prediction)
train, test = train_test_split(df, test_size=0.2, random_state=42)

y_true = []
y_pred = []

for title in test['name'][:200]:
    recs = recommend(title, top_n=5)
    true_genre = df[df['name'] == title]['genre'].values[0]
    match = 0
    for r in recs:
        if df[df['name']==r]['genre'].values[0] == true_genre:
            match = 1
            break
    y_true.append(1)
    y_pred.append(match)

precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

precision, recall, f1

(1.0, 0.815, 0.8980716253443526)

In [6]:
# Interview Question

In [7]:
# 1) Difference Between User-Based and Item-Based Collaborative Filtering
# Both are types of collaborative filtering, used in recommendation systems to predict user preferences based on past behavior.
# User-Based Collaborative Filtering

# Definition:
# Recommends items to a user based on the preferences of similar users.

# How it works:
# Find users with behavior similar to the target user (using cosine similarity, Pearson correlation, etc.).
# Identify items liked by these similar users.
# Recommend those items to the target user.

# Example:
# If User A and User B like the same movies, and User B likes a new movie, recommend that movie to User A.

# Pros:
# Simple to understand
# Personalized recommendations

# Cons:
# Scalability issues with many users
# Less effective when user behavior changes frequently
# Item-Based Collaborative Filtering

# Definition:
# Recommends items based on similar items to what the user has already liked.

# How it works:
# Compute similarity between items.
# Identify items similar to those the user has interacted with.
# Recommend similar items.

# Example:
# If a user liked Movie X, recommend Movie Y because many users who liked X also liked Y.

# Pros:
# More scalable than user-based
# Stable recommendations
# Works well for large systems

# Cons:
# Less personalized
# New items face the cold-start problem

In [8]:
# 2) What is Collaborative Filtering, and How Does It Work?
# Collaborative Filtering (CF)

# Definition:
# Collaborative filtering is a recommendation technique that predicts user interests based on patterns of user interactions, without using item content.
# “Users who behaved similarly in the past will behave similarly in the future.”
# How Collaborative Filtering Works:

# Collect Interaction Data
# Ratings
# Clicks
# Purchases
# Likes
# Create User-Item Matrix
# Rows → Users
# Columns → Items
# Values → Ratings or interactions
# Find Similarities
# User-user similarity (user-based)
# Item-item similarity (item-based)
# Generate Recommendations
# Predict missing ratings
# Recommend top-N items

# Types of Collaborative Filtering
# Memory-Based CF
# User-based
# Item-based
# Uses similarity metrics directly
# Model-Based CF
# Matrix Factorization (SVD, ALS)
# Neural Collaborative Filtering
# Scales better for large datasets
# Advantages of Collaborative Filtering
# No need for item metadata
# Learns complex user behavior
# Produces highly relevant recommendations
# Limitations
# Cold-start problem (new users/items)
# Sparsity in user-item matrix
# Scalability for very large datasets