In [31]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score

In [3]:
anime_df = pd.read_csv('anime.csv')

In [5]:
 # Remove missing values
anime_df = anime_df.dropna() 
anime_df.reset_index(drop=True, inplace=True)

In [21]:
# Feature Extraction
vectorizer = TfidfVectorizer(stop_words='english')
genre_matrix = vectorizer.fit_transform(anime_df['genre'])


<12017x46 sparse matrix of type '<class 'numpy.float64'>'
	with 39659 stored elements in Compressed Sparse Row format>

In [19]:
## Cosine Similarity between anime based on genre
cosine_sim = cosine_similarity(genre_matrix, genre_matrix)
cosine_sim

array([[1.        , 0.14715318, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.14715318, 1.        , 0.17877808, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.17877808, 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 1.        ,
        1.        ],
       [0.        , 0.        , 0.        , ..., 1.        , 1.        ,
        1.        ],
       [0.        , 0.        , 0.        , ..., 1.        , 1.        ,
        1.        ]])

In [23]:
# Function to get anime recommendations
def recommend_anime(title, cosine_sim=cosine_sim, df=anime_df, num_recommendations=5):
    if title not in df['name'].values:
        return "Anime not found in the dataset."
    # index of the anime
    idx = df[df['name'] == title].index[0]
    # similarity scores
    sim_scores = list(enumerate(cosine_sim[idx]))
    # Sort anime by similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:num_recommendations+1]
    # Get indices of recommended anime
    anime_indices = [i[0] for i in sim_scores]
    return df.iloc[anime_indices][['name', 'genre']]

In [25]:
recommendations = recommend_anime("Naruto")
print(recommendations)

                                                   name  \
615                                  Naruto: Shippuuden   
841                                              Naruto   
1103  Boruto: Naruto the Movie - Naruto ga Hokage ni...   
1343                                        Naruto x UT   
1472        Naruto: Shippuuden Movie 4 - The Lost Tower   

                                                  genre  
615   Action, Comedy, Martial Arts, Shounen, Super P...  
841   Action, Comedy, Martial Arts, Shounen, Super P...  
1103  Action, Comedy, Martial Arts, Shounen, Super P...  
1343  Action, Comedy, Martial Arts, Shounen, Super P...  
1472  Action, Comedy, Martial Arts, Shounen, Super P...  


In [33]:
# Splitting dataset into training and testing sets
train_df, test_df = train_test_split(anime_df, test_size=0.2, random_state=42)

In [35]:
# Evaluation Function
def evaluate_recommendation_system():
    y_true = []
    y_pred = []
    
    for title in test_df['name'].sample(50):  # Sample 50 anime for evaluation
        recommendations = recommend_anime(title, df=train_df)
        
        if isinstance(recommendations, str):  # If anime not found
            continue
        
        recommended_anime = recommendations['name'].tolist()
        actual_anime = test_df[test_df['name'] == title]['name'].tolist()
        
        y_true.append(1 if title in actual_anime else 0)
        y_pred.append(1 if title in recommended_anime else 0)
    
    precision = precision_score(y_true, y_pred, zero_division=1)
    recall = recall_score(y_true, y_pred, zero_division=1)
    f1 = f1_score(y_true, y_pred, zero_division=1)
    
    return {"Precision": precision, "Recall": recall, "F1-Score": f1}


In [37]:
recommendations = recommend_anime("Naruto")
print(recommendations)

                                                   name  \
615                                  Naruto: Shippuuden   
841                                              Naruto   
1103  Boruto: Naruto the Movie - Naruto ga Hokage ni...   
1343                                        Naruto x UT   
1472        Naruto: Shippuuden Movie 4 - The Lost Tower   

                                                  genre  
615   Action, Comedy, Martial Arts, Shounen, Super P...  
841   Action, Comedy, Martial Arts, Shounen, Super P...  
1103  Action, Comedy, Martial Arts, Shounen, Super P...  
1343  Action, Comedy, Martial Arts, Shounen, Super P...  
1472  Action, Comedy, Martial Arts, Shounen, Super P...  


In [39]:
# Evaluate the system
evaluation_results = evaluate_recommendation_system()
print(evaluation_results)

{'Precision': 1.0, 'Recall': 1.0, 'F1-Score': 1.0}


## Interview Questions

What is Collaborative Filtering & How Does It Work?

Collaborative Filtering is a recommendation system technique that suggests items to users based on past interactions and preferences of similar users. It is widely used in platforms like Netflix, Amazon, and Spotify to recommend movies, products, or songs.

How It Works:

Collect user-item interaction data (e.g., ratings, purchases, or views).

Find similarities between users or items based on past behavior.

Predict user preferences by leveraging these similarities.

Recommend items that similar users liked or similar items that a user has interacted with.
