In [1]:
import pandas as pd

df = pd.read_csv("anime.csv")
print(df.head())
print(df.info())

   anime_id                              name  \
0     32281                    Kimi no Na wa.   
1      5114  Fullmetal Alchemist: Brotherhood   
2     28977                          Gintama°   
3      9253                       Steins;Gate   
4      9969                     Gintama&#039;   

                                               genre   type episodes  rating  \
0               Drama, Romance, School, Supernatural  Movie        1    9.37   
1  Action, Adventure, Drama, Fantasy, Magic, Mili...     TV       64    9.26   
2  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.25   
3                                   Sci-Fi, Thriller     TV       24    9.17   
4  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.16   

   members  
0   200630  
1   793665  
2   114262  
3   673572  
4   151266  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  

In [2]:
# Handle missing values (dropna since <5% missing)
df = df.dropna(subset=['genre', 'rating', 'type'])

In [6]:
# Convert episodes to numeric (coerce errors to NaN then drop)
df['episodes'] = pd.to_numeric(df['episodes'], errors='coerce')
df = df.dropna(subset=['episodes'])

In [7]:
# TF-IDF on genres (no need for additional scaling)
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['genre'])

In [9]:
# Combine features (TF-IDF genre + normalized rating)
from scipy.sparse import hstack

rating_normalized = df['rating'].values.reshape(-1, 1) / 10  # Scale 0-10 to 0-1
features = hstack([tfidf_matrix, rating_normalized])

In [11]:
# Compute cosine similarity
from sklearn.metrics.pairwise import cosine_similarity

cosine_sim = cosine_similarity(features, features)

In [12]:
# Recommendation function
def recommend_anime(title, cosine_sim=cosine_sim, df=df, top_n=5):
    try:
        idx = df[df['name'].str.lower() == title.lower()].index[0]
        sim_scores = list(enumerate(cosine_sim[idx]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[1:top_n+1]  # Skip the input anime itself
        anime_indices = [i[0] for i in sim_scores]
        return df[['name', 'genre', 'rating']].iloc[anime_indices]
    except IndexError:
        return f"Anime '{title}' not found in database."
    except Exception as e:
        return f"Error occurred: {str(e)}"

In [13]:
# Example usage
print(recommend_anime("Naruto"))

                                                   name  \
4540                                 Trava: Fist Planet   
1906  Mobile Suit Gundam 00 The Movie: A Wakening of...   
2130               Super Robot Taisen OG: The Inspector   
2165                                Sei Juushi Bismarck   
2828                            Aldnoah.Zero 2nd Season   

                                     genre  rating  
4540  Action, Comedy, Mecha, Sci-Fi, Space    6.70  
1906          Action, Mecha, Sci-Fi, Space    7.41  
2130          Action, Mecha, Sci-Fi, Space    7.35  
2165          Action, Mecha, Sci-Fi, Space    7.34  
2828          Action, Mecha, Sci-Fi, Space    7.15  


## Interview Questions

### 1. **Can you explain the difference between user-based and item-based collaborative filtering?**

**Answer:**
User-based collaborative filtering recommends items by finding similar users. It assumes that if two users have rated similar items similarly in the past, they will likely agree in the future too. So, it recommends what similar users liked.

Item-based collaborative filtering recommends items by finding similar items. It assumes that if a user liked item A, they will also like item B if item A and B are rated similarly by other users.

- **User-Based**: "People similar to you also liked this."
- **Item-Based**: "People who liked this item also liked that item."

---

### 2. **What is collaborative filtering, and how does it work?**

**Answer:**
Collaborative filtering is a recommendation technique that makes predictions about a user's interests by collecting preferences from many users (collaboration). It relies on the assumption that similar users will like similar items.

There are two main types:
- **User-Based Filtering**: Finds users similar to the target user and recommends items they liked.
- **Item-Based Filtering**: Finds items similar to what the user already liked and recommends those.

Collaborative filtering doesn’t require content about the items themselves—only user interaction data like ratings or behavior.

