In [None]:
#                                                             Data Preprocessing:

In [1]:
import pandas as pd

data = pd.read_csv('A16.csv')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB


In [2]:
data.isnull().sum()

anime_id      0
name          0
genre        62
type         25
episodes      0
rating      230
members       0
dtype: int64

In [3]:
(data==0).sum()

anime_id    0
name        0
genre       0
type        0
episodes    0
rating      0
members     0
dtype: int64

In [4]:
data['genre'] = data['genre'].fillna('Unknown')
data['type'] = data['type'].fillna(data['type'].mode()[0])
data['rating'] = data['rating'].fillna(data['rating'].median())

In [5]:
data.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [6]:
data.isnull().sum()

anime_id    0
name        0
genre       0
type        0
episodes    0
rating      0
members     0
dtype: int64

In [None]:
#                                                             Feature Extraction:

In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Initialize TF-IDF Vectorizer
tfidf = TfidfVectorizer(stop_words='english')

#To convert text data into numerical form
genre_tfidf = tfidf.fit_transform(data['genre'])
genre_tfidf.shape


(12294, 47)

In [10]:
from sklearn.metrics.pairwise import cosine_similarity

# Compute cosine similarity
CS = cosine_similarity(genre_tfidf)
CS.shape


(12294, 12294)

In [None]:
#                                                                    Recommendation System:

In [14]:
#Content-based recommendation system.
def recommend_anime(anime_name, top_n):
    # Find the index of the given anime
    anime_idx = data[data['name'] == anime_name].index[0]
    # Get similarity scores for that anime
    sim_scores= CS[anime_idx]
    # Convert scores into a DataFrame
    sim_df = pd.DataFrame({
        'index': range(len(sim_scores)),
        'score': sim_scores,
        'rating': data['rating']
    })
    # Sort by similarity score and rating
    sim_df = sim_df.sort_values(by=['score', 'rating'], ascending=False) 
    # Remove the same anime itself
    sim_df = sim_df.iloc[1:top_n+1]
    # Return recommended anime names
    return data[['name', 'rating']].iloc[sim_df['index']]

In [15]:
recommend_anime("Gintama°", top_n=5)

Unnamed: 0,name,rating
4,Gintama&#039;,9.16
9,Gintama&#039;: Enchousen,9.11
8,Gintama Movie: Kanketsu-hen - Yorozuya yo Eien...,9.1
12,Gintama,9.04
63,Gintama: Yorinuki Gintama-san on Theater 2D,8.6


In [None]:
#                                                                 Interview Questions:

In [None]:
1) Difference between User-based and Item-based Collaborative Filtering

User-based CF:
Recommends items liked by similar users.
“Users like you also liked this.”

Item-based CF:
Recommends items similar to what a user liked before.
“Because you liked this item, you may like these.”

Key difference:

User-based -> similarity between users
Item-based -> similarity between items


In [None]:
2) What is Collaborative Filtering & How it Works

Collaborative Filtering is a recommendation technique that uses user
behavior (ratings/interactions) instead of item content.

How it works :

* Collect user–item ratings
* Find similarity (users or items)
* Predict preferences based on similar patterns
* Recommend top items