In [14]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the dataset
df = pd.read_csv('anime.csv')

# Display basic info
print("Dataset Info:")
print(df.info())
print("\nSample Data:")
print(df.head())

# Drop rows with missing essential data
df.dropna(subset=['genre', 'name'], inplace=True)

# Fill missing ratings with the mean rating
df['rating'] = df['rating'].fillna(df['rating'].mean())

# TF-IDF Vectorization of genres
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['genre'])

# Compute cosine similarity between anime based on genre
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Map anime names to indices
anime_indices = pd.Series(df.index, index=df['name']).drop_duplicates()

# Recommendation function
def recommend_anime(title, top_n=10):
    if title not in anime_indices:
        return "Anime not found in the dataset."
    
    idx = anime_indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n + 1]  # Exclude the anime itself

    recommended = [df['name'].iloc[i[0]] for i in sim_scores]
    return recommended

# Simulated Evaluation Function
def evaluate_recommendation(title, top_n=10):
    recommendations = recommend_anime(title, top_n=top_n)
    if isinstance(recommendations, str):
        return recommendations
    
    # Use top 50 rated anime as a pseudo "relevant" set
    top_rated = df.sort_values(by='rating', ascending=False).head(50)['name'].tolist()
    
    hits = [anime for anime in recommendations if anime in top_rated]
    precision = len(hits) / top_n

    print(f"\nRecommendations for '{title}':")
    for i, anime in enumerate(recommendations, 1):
        print(f"{i}. {anime}")
    
    print(f"\nSimulated Precision (hit in top-rated): {precision:.2f}")


evaluate_recommendation('Naruto', top_n=10)


Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB
None

Sample Data:
   anime_id                              name  \
0     32281                    Kimi no Na wa.   
1      5114  Fullmetal Alchemist: Brotherhood   
2     28977                          Gintama°   
3      9253                       Steins;Gate   
4      9969                     Gintama&#039;   

                                               genre   type episodes  rating  \
0               Drama, Romance, School, Supernatural  Movie        1 

In [10]:
evaluate_recommendation("Steins;Gate", top_n=10)


Recommendations for 'Steins;Gate':
1. Steins;Gate Movie: Fuka Ryouiki no Déjà vu
2. Steins;Gate: Oukoubakko no Poriomania
3. Steins;Gate: Kyoukaimenjou no Missing Link - Divide By Zero
4. Steins;Gate 0
5. Under the Dog
6. Loups=Garous
7. Loups=Garous Pilot
8. Kyoto Animation: Megane-hen
9. Ibara no Ou
10. Duan Nao

Simulated Precision (hit in top-rated): 0.00

Recommendations for 'Fullmetal Alchemist: Brotherhood':
1. Fullmetal Alchemist
2. Fullmetal Alchemist: The Sacred Star of Milos
3. Fullmetal Alchemist: Brotherhood Specials
4. Tales of Vesperia: The First Strike
5. Tide-Line Blue
6. Fullmetal Alchemist: Reflections
7. Magi: The Kingdom of Magic
8. Magi: The Labyrinth of Magic
9. Magi: Sinbad no Bouken (TV)
10. Magi: Sinbad no Bouken

Simulated Precision (hit in top-rated): 0.00


In [12]:
evaluate_recommendation("Fullmetal Alchemist: Brotherhood", top_n=10)


Recommendations for 'Fullmetal Alchemist: Brotherhood':
1. Fullmetal Alchemist
2. Fullmetal Alchemist: The Sacred Star of Milos
3. Fullmetal Alchemist: Brotherhood Specials
4. Tales of Vesperia: The First Strike
5. Tide-Line Blue
6. Fullmetal Alchemist: Reflections
7. Magi: The Kingdom of Magic
8. Magi: The Labyrinth of Magic
9. Magi: Sinbad no Bouken (TV)
10. Magi: Sinbad no Bouken

Simulated Precision (hit in top-rated): 0.00


In [None]:
# 1. Can you explain the difference between user-based and item-based collaborative filtering?
# User-based collaborative filtering:

# Finds users similar to the target user based on behavior (ratings).

# Recommends items that similar users liked.

# Example: If User A and User B liked the same anime, and User B watched another anime, recommend it to User A.

# Item-based collaborative filtering:

# Finds items (animes) similar to the ones the user has liked.

# Recommends items with high similarity to previously liked items.

# More scalable and stable than user-based filtering.

In [None]:
# 2. What is collaborative filtering, and how does it work?
# Collaborative filtering is a recommender system approach that uses the behavior of users (ratings, clicks) to make predictions.

# Assumes: "If users agreed in the past, they'll agree in the future."

# Types:

# User-based: Recommend based on similar users.

# Item-based: Recommend similar items.

# Uses similarity metrics like cosine similarity, Pearson correlation, etc.