In [12]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MultiLabelBinarizer, StandardScaler
from sklearn.model_selection import train_test_split

In [14]:
# Load the dataset
try:
    df = pd.read_csv('anime.csv')
except FileNotFoundError:
    print("Error: The 'anime.csv' file was not found. Please ensure it's in the same directory as the script.")
    exit()

In [15]:
# Handle missing values
df['genre'].fillna('Unknown', inplace=True)
df['rating'].fillna(df['rating'].mean(), inplace=True)
df['episodes'] = pd.to_numeric(df['episodes'], errors='coerce').fillna(0)
df['name'] = df['name'].str.replace('&#039;', "'")

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['genre'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['rating'].fillna(df['rating'].mean(), inplace=True)


In [16]:
# Step 2: Feature Extraction
# Convert genres to a one-hot encoded matrix
df['genre'] = df['genre'].apply(lambda x: x.split(', '))
mlb = MultiLabelBinarizer()
genre_matrix = mlb.fit_transform(df['genre'])
genre_df = pd.DataFrame(genre_matrix, columns=mlb.classes_)

In [17]:
# Step 3: Recommendation System
# Calculate cosine similarity matrix
cosine_sim_matrix = cosine_similarity(features_df)

In [22]:
def recommend_anime(anime_name, top_n=10):
    """Recommends anime based on cosine similarity."""
    if anime_name not in df['name'].values:
        print(f"'{anime_name}' not found in the dataset.")
        return pd.DataFrame()

    anime_index = df[df['name'] == anime_name].index[0]
    sim_scores = list(enumerate(cosine_sim_matrix[anime_index]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]

    anime_indices = [i[0] for i in sim_scores]
    recommendation_scores = [i[1] for i in sim_scores]

    recommended_anime = df.iloc[anime_indices].copy()
    recommended_anime['similarity_score'] = recommendation_scores

    return recommended_anime[['name', 'genre', 'rating', 'similarity_score']]

In [26]:
# Example usage of the recommendation function
print("Recommended Anime for 'Steins;Gate':")

Recommended Anime for 'Steins;Gate':


In [25]:
print(recommend_anime('Steins;Gate'))

                                 name  \
40                         Death Note   
19    Code Geass: Hangyaku no Lelouch   
86                 Shingeki no Kyojin   
98                        Psycho-Pass   
166                        Durarara!!   
29         Tengen Toppa Gurren Lagann   
131                         Toradora!   
1    Fullmetal Alchemist: Brotherhood   
159                      Angel Beats!   
250                 Zankyou no Terror   

                                                 genre  rating  \
40   [Mystery, Police, Psychological, Supernatural,...    8.71   
19   [Action, Mecha, Military, School, Sci-Fi, Supe...    8.83   
86      [Action, Drama, Fantasy, Shounen, Super Power]    8.54   
98             [Action, Police, Psychological, Sci-Fi]    8.50   
166                    [Action, Mystery, Supernatural]    8.38   
29          [Action, Adventure, Comedy, Mecha, Sci-Fi]    8.78   
131           [Comedy, Romance, School, Slice of Life]    8.45   
1    [Action, Advent

In [27]:
# Step 4: Evaluation
# Split a subset of the data for evaluation
df_action = df[df['genre'].apply(lambda x: 'Action' in x)]
if len(df_action) > 1:
    train_anime, test_anime = train_test_split(df_action, test_size=0.3, random_state=42)

In [37]:
if not train_anime.empty:
    target_anime_name = train_anime.iloc[0]['name']
    # Get recommendations to evaluate
    recommendations = recommend_anime(target_anime_name, top_n=len(test_anime))

    # Calculate and print evaluation metrics
    retrieved_items = set(recommendations['name'].values)
    relevant_items = set(test_anime['name'].values)
    true_positives = len(retrieved_items.intersection(relevant_items))
    precision = true_positives / len(retrieved_items) if len(retrieved_items) > 0 else 0
    recall = true_positives / len(relevant_items) if len(relevant_items) > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

    print(f"\nEvaluation Metrics for recommendations based on '{target_anime_name}':")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")
else:
    print("\nNot enough 'Action' genre anime to split into training and testing sets for evaluation.")



Evaluation Metrics for recommendations based on 'Majuu Sensen: The Apocalypse':
Precision: 0.0433
Recall: 0.0433
F1-Score: 0.0433


In [None]:
Interview Questions.
1.Can you explain the difference between user-based and item-based collaborative filtering?
Ans: User based and item based collaborative filtering are two fundamental approaches in recommender systems that differ in how they determine similarity to make recommendations.
they are both types of memory-based collaborative filtering, which means they use the entire user item interaction dataset to generate recommendations.
User based collaborative filtering:
>Finding similar users.
>Generating recommendations.
Item based collaborative Filtering:
>Finding similar items.
>Generating recommendations.



2.What is collaborative filtering, and how does it work?
Ans: Collaborative filtering is a type of recommender system that predicts a users interest by collecting and analyzing information from many users.
The process of collaborative filtering can be broken down into few steps:
>Data Collection.
>Finding Similarities.
>Making recommendations.
