In [None]:
1. Difference between User-Based and Item-Based Collaborative Filtering
--->User-based focuses on finding similar users, while item-based focuses on finding similar items.
Item-based filtering is generally more stable and scalable since user preferences change more frequently than item relationships.

2. What is Collaborative Filtering, and How Does It Work?
-->Collaborative filtering is a recommendation technique that predicts a user’s preferences based on the behavior of other users.
It works by analyzing past interactions (e.g., ratings, clicks, purchases) to find patterns and similarities either between users or between items.

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the dataset
df = pd.read_csv(r"C:\Users\Asad shaikh\Documents\anime.csv")



In [2]:
# Task 1: Data Preprocessing
print("Dataset Info:")
print(df.info())

# Handle missing values
df = df.dropna()

# Select relevant features
df = df[['anime_id', 'name', 'genre', 'rating']]

def clean_genre(genre):
    return " ".join(genre.split(", ")) if isinstance(genre, str) else ""

df['genre'] = df['genre'].apply(clean_genre)


Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB
None


In [3]:
# Task 2: Feature Extraction
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(df['genre'])

# Compute cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)


In [4]:
# Task 3: Recommendation System
def recommend_anime(anime_name, top_n=5):
    if anime_name not in df['name'].values:
        return "Anime not found."
    
    anime_index = df[df['name'] == anime_name].index[0]
    similarity_scores = list(enumerate(cosine_sim[anime_index]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    top_anime_indices = [i[0] for i in similarity_scores[1:top_n+1]]
    
    return df.iloc[top_anime_indices][['name', 'genre', 'rating']]

# Example 
anime_recommendations = recommend_anime("Naruto")
print(anime_recommendations)

                                                   name  \
615                                  Naruto: Shippuuden   
841                                              Naruto   
1103  Boruto: Naruto the Movie - Naruto ga Hokage ni...   
1343                                        Naruto x UT   
1472        Naruto: Shippuuden Movie 4 - The Lost Tower   

                                               genre  rating  
615   Action Comedy Martial Arts Shounen Super Power    7.94  
841   Action Comedy Martial Arts Shounen Super Power    7.81  
1103  Action Comedy Martial Arts Shounen Super Power    7.68  
1343  Action Comedy Martial Arts Shounen Super Power    7.58  
1472  Action Comedy Martial Arts Shounen Super Power    7.53  
