<a href="https://colab.research.google.com/github/Himanshukunar/excelr-assignments/blob/main/Assignment11.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score

df = pd.read_csv("/content/anime.csv")

print("Dataset Shape:", df.shape)
print("Columns:", df.columns)
print(df.head())

df = df.dropna(subset=["genre", "rating", "name"])
df.reset_index(drop=True, inplace=True)
print("After Cleaning:", df.shape)


Dataset Shape: (12294, 7)
Columns: Index(['anime_id', 'name', 'genre', 'type', 'episodes', 'rating', 'members'], dtype='object')
   anime_id                              name  \
0     32281                    Kimi no Na wa.   
1      5114  Fullmetal Alchemist: Brotherhood   
2     28977                          Gintama°   
3      9253                       Steins;Gate   
4      9969                     Gintama&#039;   

                                               genre   type episodes  rating  \
0               Drama, Romance, School, Supernatural  Movie        1    9.37   
1  Action, Adventure, Drama, Fantasy, Magic, Mili...     TV       64    9.26   
2  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.25   
3                                   Sci-Fi, Thriller     TV       24    9.17   
4  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.16   

   members  
0   200630  
1   793665  
2   114262  
3   673572  
4   151266  
After Cleaning: (

In [3]:

df["features"] = df["genre"].fillna('') + " " + df["type"].fillna('') + " " + df["name"].fillna('')
tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(df["features"])

print("TF-IDF Matrix Shape:", tfidf_matrix.shape)

TF-IDF Matrix Shape: (12017, 11831)


In [4]:

cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

def recommend_anime(title, top_n=5):
    if title not in df["name"].values:
        return f"❌ Anime '{title}' not found in dataset!"

    idx = df[df["name"] == title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n+1]  # exclude the anime itself

    anime_indices = [i[0] for i in sim_scores]
    return df["name"].iloc[anime_indices].tolist()

print("\nRecommendations for 'Naruto':")
print(recommend_anime("Naruto", top_n=5))


Recommendations for 'Naruto':
['The Last: Naruto the Movie', 'Naruto: Shippuuden', 'Naruto x UT', 'Boruto: Naruto the Movie', 'Naruto Shippuuden: Sunny Side Battle']


In [6]:

df["relevant"] = df["rating"].apply(lambda x: 1 if x >= 7 else 0)

train, test = train_test_split(df, test_size=0.2, random_state=42)

y_true = test["relevant"]
y_pred = [1 if r >= 7 else 0 for r in test["rating"]]  # baseline

precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

print("\nEvaluation Metrics:")
print("Precision:", precision)
print("Recall:", recall)
print("F1-Score:", f1)
print("\nRecommendations for 'One Piece':")
print(recommend_anime("One Piece", top_n=5))


Evaluation Metrics:
Precision: 1.0
Recall: 1.0
F1-Score: 1.0

Recommendations for 'One Piece':
['One Piece Movie 1', 'One Piece Recap', 'One Piece: Adventure of Nebulandia', 'One Piece: Romance Dawn', 'R.O.D the TV']
