In [4]:
import pandas as pd

# Load dataset
df = pd.read_csv('anime.csv')
print(df.head())  # View the first few rows
print(df.info())  # Summary of data types and null values


   anime_id                              name  \
0     32281                    Kimi no Na wa.   
1      5114  Fullmetal Alchemist: Brotherhood   
2     28977                          Gintama°   
3      9253                       Steins;Gate   
4      9969                     Gintama&#039;   

                                               genre   type episodes  rating  \
0               Drama, Romance, School, Supernatural  Movie        1    9.37   
1  Action, Adventure, Drama, Fantasy, Magic, Mili...     TV       64    9.26   
2  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.25   
3                                   Sci-Fi, Thriller     TV       24    9.17   
4  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.16   

   members  
0   200630  
1   793665  
2   114262  
3   673572  
4   151266  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  

In [5]:
# Check for missing values
print(df.isnull().sum())

# Drop rows with missing 'name' or 'genre'
df.dropna(subset=['name', 'genre'], inplace=True)

# Fill missing ratings with the mean rating
df['rating'].fillna(df['rating'].mean(), inplace=True)


anime_id      0
name          0
genre        62
type         25
episodes      0
rating      230
members       0
dtype: int64


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['rating'].fillna(df['rating'].mean(), inplace=True)


In [6]:
# Split genres into a list and one-hot encode them
df['genre'] = df['genre'].str.split(', ')
genre_df = df['genre'].str.join('|').str.get_dummies()

# One-hot encode 'type' column as well
type_df = pd.get_dummies(df['type'])


In [7]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
df[['rating', 'members']] = scaler.fit_transform(df[['rating', 'members']])


In [8]:
# Concatenate genre, type, rating, and members into one DataFrame
features_df = pd.concat([genre_df, type_df, df[['rating', 'members']]], axis=1)


In [9]:
from sklearn.metrics.pairwise import cosine_similarity

# Compute the cosine similarity matrix
cosine_sim = cosine_similarity(features_df)


In [10]:
def recommend_anime(anime_name, df, cosine_sim, threshold=0.5, top_n=10):
    # Get the index of the anime by name
    index = df[df['name'] == anime_name].index[0]

    # Retrieve similarity scores for this anime
    sim_scores = list(enumerate(cosine_sim[index]))

    # Filter based on threshold and sort by similarity score
    sim_scores = sorted([s for s in sim_scores if s[1] > threshold], key=lambda x: x[1], reverse=True)

    # Get indices of top similar animes
    top_indices = [i[0] for i in sim_scores[1:top_n+1]]

    # Return the names of top recommended anime
    return df['name'].iloc[top_indices].tolist()

# Example usage
print(recommend_anime("Naruto", df, cosine_sim, threshold=0.6, top_n=5))


['Naruto: Shippuuden', 'Katekyo Hitman Reborn!', 'Dragon Ball Z', 'Bleach', 'Dragon Ball Kai']


In [11]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Calculate precision, recall, and F1 (example with placeholders)
# precision_score(y_true, y_pred, average='macro')


#Interview Questions
1. Can you explain the difference between user-based and item-based collaborative filtering?

 Ans. User-based filtering recommends items based on similar users’ preferences, while item-based filtering suggests items similar to what a user has already liked. User-based focuses on finding users with shared tastes, whereas item-based focuses on item similarity, often making it more scalable for large user bases.

2. What is collaborative filtering, and how does it work?

 Ans. Collaborative filtering uses user behavior (like ratings or purchases) to make recommendations, without needing item details. It works by either finding similar users (user-based) or similar items (item-based) to generate personalized suggestions.