In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from sklearn.model_selection import train_test_split
file_path =r"anime.csv"  
df = pd.read_csv(file_path)
df.head()


Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [3]:
missing_values = df.isnull().sum()
missing_values


anime_id      0
name          0
genre        62
type         25
episodes      0
rating      230
members       0
dtype: int64

In [4]:
df['genre'].fillna('Unknown', inplace=True)  
df['rating'].fillna(df['rating'].mean(), inplace=True) 
df.dropna(inplace=True)  
df.isnull().sum()


anime_id    0
name        0
genre       0
type        0
episodes    0
rating      0
members     0
dtype: int64

In [5]:
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['genre'].astype(str))
filtered_df = df[df['rating'] > 8.0].reset_index(drop=True)
tfidf_matrix_filtered = tfidf.fit_transform(filtered_df['genre'].astype(str))
cosine_sim_filtered = cosine_similarity(tfidf_matrix_filtered, tfidf_matrix_filtered)
cosine_sim_filtered.shape


(583, 583)

In [6]:
anime_indices_filtered = pd.Series(filtered_df.index, index=filtered_df['name']).drop_duplicates()

def recommend_anime(title, num_recommendations=5):
    """Recommend anime based on cosine similarity."""
    if title not in anime_indices_filtered:
        return "Anime not found in dataset."
    
    idx = anime_indices_filtered[title]
    
    sim_scores = list(enumerate(cosine_sim_filtered[idx]))
    
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    sim_scores = sim_scores[1:num_recommendations+1]
    
    anime_recommendations = [filtered_df.iloc[i[0]]['name'] for i in sim_scores]
    
    return anime_recommendations

recommend_anime("Steins;Gate", 5)


['Steins;Gate Movie: Fuka Ryouiki no Déjà vu',
 'Steins;Gate: Oukoubakko no Poriomania',
 'Steins;Gate: Kyoukaimenjou no Missing Link - Divide By Zero',
 'Gankutsuou',
 'Higashi no Eden']

In [7]:
def evaluate_recommendations():
    """Evaluate the recommendation system using precision, recall, and F1-score."""
    precision_list = []
    recall_list = []
    
    train_data, test_data = train_test_split(filtered_df, test_size=0.2, random_state=42)
    
    for index, row in test_data.iterrows():
        actual_anime = row['name']
        recommended_anime = recommend_anime(actual_anime, num_recommendations=5)
        
        if isinstance(recommended_anime, str): 
            continue  

        relevant_items = sum(1 for anime in recommended_anime if anime in train_data['name'].values)
        precision = relevant_items / len(recommended_anime) if recommended_anime else 0
        recall = relevant_items / 5  

        precision_list.append(precision)
        recall_list.append(recall)
    
   
    precision = np.mean(precision_list) if precision_list else 0
    recall = np.mean(recall_list) if recall_list else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    
    return precision, recall, f1_score

evaluate_recommendations()


(0.7418803418803419, 0.7418803418803419, 0.741880341880342)

# 1. Can you explain the difference between user-based and item-based collaborative filtering?


# 2. What is collaborative filtering, and how does it work?
