In [None]:
from google.colab import files
uploaded=files.upload()

Saving anime.csv to anime.csv


In [None]:
import pandas as pd

# Load the dataset
anime_data = pd.read_csv("/content/anime.csv")

# Display the first few rows
print(anime_data.head())

# Check dataset structure
print("\nDataset Info:")
print(anime_data.info())

# Check for missing values
print("\nMissing Values:")
print(anime_data.isnull().sum())


   anime_id                              name  \
0     32281                    Kimi no Na wa.   
1      5114  Fullmetal Alchemist: Brotherhood   
2     28977                          Gintama°   
3      9253                       Steins;Gate   
4      9969                     Gintama&#039;   

                                               genre   type episodes  rating  \
0               Drama, Romance, School, Supernatural  Movie        1    9.37   
1  Action, Adventure, Drama, Fantasy, Magic, Mili...     TV       64    9.26   
2  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.25   
3                                   Sci-Fi, Thriller     TV       24    9.17   
4  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.16   

   members  
0   200630  
1   793665  
2   114262  
3   673572  
4   151266  

Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null

In [None]:
# Fill missing values with appropriate values
anime_data.fillna({"rating": anime_data["rating"].mean(), "genre": "Unknown"}, inplace=True)

# Drop unnecessary columns (e.g., anime_id if not needed)
if "anime_id" in anime_data.columns:
    anime_data.drop(columns=["anime_id"], inplace=True)

# Display processed data
print("\n✅ Data Preprocessing Completed!")
print(anime_data.head())



✅ Data Preprocessing Completed!
                               name  \
0                    Kimi no Na wa.   
1  Fullmetal Alchemist: Brotherhood   
2                          Gintama°   
3                       Steins;Gate   
4                     Gintama&#039;   

                                               genre   type episodes  rating  \
0               Drama, Romance, School, Supernatural  Movie        1    9.37   
1  Action, Adventure, Drama, Fantasy, Magic, Mili...     TV       64    9.26   
2  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.25   
3                                   Sci-Fi, Thriller     TV       24    9.17   
4  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.16   

   members  
0   200630  
1   793665  
2   114262  
3   673572  
4   151266  


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Convert genres into numerical format using TF-IDF Vectorizer
vectorizer = TfidfVectorizer(stop_words="english")
genre_matrix = vectorizer.fit_transform(anime_data["genre"])

print("\n✅ Genre Vectorization Completed!")
print(f"Genre matrix shape: {genre_matrix.shape}")



✅ Genre Vectorization Completed!
Genre matrix shape: (12294, 47)


In [None]:
from sklearn.metrics.pairwise import cosine_similarity

# Compute the cosine similarity matrix
cosine_sim = cosine_similarity(genre_matrix, genre_matrix)

print("\n✅ Cosine Similarity Computed!")



✅ Cosine Similarity Computed!


In [None]:
def recommend_anime(title, anime_data, cosine_sim, top_n=5):
    # Get the index of the anime
    indices = pd.Series(anime_data.index, index=anime_data["name"]).drop_duplicates()

    if title not in indices:
        print("❌ Anime not found in the dataset.")
        return []

    idx = indices[title]

    # Get similarity scores for all anime
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort anime by similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get top N similar anime (excluding the input anime itself)
    sim_scores = sim_scores[1:top_n + 1]

    # Get the indices of recommended anime
    anime_indices = [i[0] for i in sim_scores]

    return anime_data.iloc[anime_indices][["name", "genre", "rating"]]

# Example usage
anime_title = "Naruto"  # Change this to any anime title in the dataset
recommended_anime = recommend_anime(anime_title, anime_data, cosine_sim)
print("\n🎉 Recommended Anime for:", anime_title)
print(recommended_anime)



🎉 Recommended Anime for: Naruto
                                                   name  \
615                                  Naruto: Shippuuden   
841                                              Naruto   
1103  Boruto: Naruto the Movie - Naruto ga Hokage ni...   
1343                                        Naruto x UT   
1472        Naruto: Shippuuden Movie 4 - The Lost Tower   

                                                  genre  rating  
615   Action, Comedy, Martial Arts, Shounen, Super P...    7.94  
841   Action, Comedy, Martial Arts, Shounen, Super P...    7.81  
1103  Action, Comedy, Martial Arts, Shounen, Super P...    7.68  
1343  Action, Comedy, Martial Arts, Shounen, Super P...    7.58  
1472  Action, Comedy, Martial Arts, Shounen, Super P...    7.53  


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score

# Split dataset into training and test sets
X_train, X_test = train_test_split(anime_data, test_size=0.2, random_state=42)

# Generate recommendations for test data
y_true = []
y_pred = []

for title in X_test["name"].sample(10, random_state=42):  # Take 10 random anime
    recommended = recommend_anime(title, anime_data, cosine_sim)
    if not recommended.empty:
        y_true.append(1)  # Assume correct recommendation
        y_pred.append(1 if title in recommended["name"].values else 0)

# Compute evaluation metrics
precision = precision_score(y_true, y_pred, zero_division=1)
recall = recall_score(y_true, y_pred, zero_division=1)
f1 = f1_score(y_true, y_pred, zero_division=1)

print(f"\n📊 Evaluation Metrics:\nPrecision: {precision:.2f}, Recall: {recall:.2f}, F1-score: {f1:.2f}")



📊 Evaluation Metrics:
Precision: 1.00, Recall: 0.20, F1-score: 0.33


1. Can you explain the difference between user-based and item-based collaborative filtering?



- **User-Based Collaborative Filtering**: Recommends items by finding users with similar preferences. If User A and User B have similar tastes, items liked by User B may be suggested to User A.  

- **Item-Based Collaborative Filtering**: Recommends items by finding similar items. If two items are often liked or purchased together, a user who likes one may be recommended the other.  



2. What is collaborative filtering, and how does it work?

**Collaborative Filtering** is a recommendation technique that suggests items based on user behavior and preferences. It works by analyzing past interactions (e.g., ratings, purchases) to find patterns among users or items.  

1. **User-Based**: Finds similar users and recommends what they liked.  
2. **Item-Based**: Finds similar items and suggests them to users.  
