In [62]:
import pandas as pd

# Load the dataset
file_path = r'C:\Users\user\AppData\Local\Temp\54ad3c13-e7e1-46ef-a5ba-39a68b1f36b3_Recommendation System.zip.6b3\Recommendation System\anime.csv'
df = pd.read_csv(file_path)

# Display column names
print("Column names in the dataset:")
print(df.columns)


Column names in the dataset:
Index(['anime_id', 'name', 'genre', 'type', 'episodes', 'rating', 'members'], dtype='object')


In [64]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score
import numpy as np

# Load the dataset
file_path = r'C:\Users\user\AppData\Local\Temp\54ad3c13-e7e1-46ef-a5ba-39a68b1f36b3_Recommendation System.zip.6b3\Recommendation System\anime.csv'
df = pd.read_csv(file_path)

# Display column names to verify
print("Column names in the dataset:")
print(df.columns)

Column names in the dataset:
Index(['anime_id', 'name', 'genre', 'type', 'episodes', 'rating', 'members'], dtype='object')


In [66]:
# Handle missing values
df['rating'].fillna(df['rating'].median(), inplace=True)
df['type'].fillna('Unknown', inplace=True)  # Use the correct column name here
df['genre'].fillna('', inplace=True)

In [67]:
# Handle non-numeric values in 'episodes' and 'members'
df['episodes'] = pd.to_numeric(df['episodes'], errors='coerce').fillna(0).astype(int)
df['members'] = pd.to_numeric(df['members'], errors='coerce').fillna(0).astype(int)


In [68]:
# Convert 'type' to one-hot encoding
type_dummies = pd.get_dummies(df['type'], prefix='type')
df = df.drop('type', axis=1)
df = pd.concat([df, type_dummies], axis=1)


In [69]:
# Prepare features for TF-IDF Vectorizer
df['combined_features'] = df['genre'] + ' ' + df['episodes'].astype(str) + ' ' + df['rating'].astype(str) + ' ' + df['members'].astype(str)

In [70]:
# Vectorize the combined features
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(df['combined_features'])

In [71]:
# Compute cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [72]:
# Function to get recommendations
def get_recommendations(anime_name, cosine_sim=cosine_sim):
    idx = df[df['name'] == anime_name].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]  # Get top 10 similar items
    anime_indices = [i[0] for i in sim_scores]
    return df['name'].iloc[anime_indices]

In [73]:
# Example recommendation
print("Recommended Anime for 'Naruto':")
recommended_anime = get_recommendations('Naruto')
print(recommended_anime)

Recommended Anime for 'Naruto':
7867                                      Iron Virgin Jun
4067                       Ikkitousen: Extravaganza Epoch
1573    Naruto: Shippuuden Movie 3 - Hi no Ishi wo Tsu...
1930                                    Dragon Ball Super
2458                 Naruto Shippuuden: Sunny Side Battle
8492                       Dr. Slump: Arale-chan Specials
2997    Naruto Soyokazeden Movie: Naruto to Mashin to ...
1472          Naruto: Shippuuden Movie 4 - The Lost Tower
1931                      Dragon Ball: Episode of Bardock
1103    Boruto: Naruto the Movie - Naruto ga Hokage ni...
Name: name, dtype: object


In [74]:
# Splitting the data into training and testing sets (for evaluation)
df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)

In [75]:
# Dummy evaluation - Assume some true labels for evaluation (you need real labels for proper evaluation)
y_true = np.random.choice([0, 1], size=len(df_test))  # Replace with actual true labels if available
y_pred = np.random.choice([0, 1], size=len(df_test))  # Replace with actual predicted labels from your system

In [76]:
# Calculate evaluation metrics
precision = precision_score(y_true, y_pred, average='binary', zero_division=0)
recall = recall_score(y_true, y_pred, average='binary', zero_division=0)
f1 = f1_score(y_true, y_pred, average='binary', zero_division=0)

In [77]:
print("\nEvaluation Metrics:")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")


Evaluation Metrics:
Precision: 0.48
Recall: 0.49
F1 Score: 0.49


In [None]:
interview questions:
===================
1. Difference Between User-Based and Item-Based Collaborative Filtering
Collaborative Filtering (CF) is a technique used in recommendation systems to predict a user's interests by collecting preferences or taste information from many users. It assumes that if users agree on one issue, they are likely to agree on other issues as well.

User-Based Collaborative Filtering:

Concept: This method recommends items based on the preferences of similar users. It looks at a user's past behavior and compares it to other users who have similar tastes.
Process:
Step 1: Identify users similar to the target user (based on their ratings or preferences for items).
Step 2: Aggregate the preferences of these similar users to make recommendations for the target user.
Example: If User A and User B have similar ratings for movies, and User A likes a movie that User B hasn't seen, User B might be recommended that movie.
Item-Based Collaborative Filtering:

Concept: This method recommends items based on the similarity between items themselves, rather than users. It assumes that if a user likes a particular item, they are likely to like items that are similar to it.
Process:
Step 1: Calculate the similarity between items based on users’ ratings.
Step 2: Recommend items that are similar to those the user has already liked.
Example: If a user likes a particular movie, the system will recommend other movies that are similar to it based on the ratings from all users.
Key Differences:

Focus: User-based focuses on user similarity, while item-based focuses on item similarity.
Scalability: Item-based filtering often scales better with large datasets because the number of items is usually less than the number of users.
Adaptability: User-based can adapt more quickly to changes in user preferences, while item-based is typically more stable over time.
2. What is Collaborative Filtering, and How Does it Work?
Collaborative Filtering (CF) is a technique used in recommendation systems to suggest items to users based on the preferences and behaviors of other users. The core idea is that users who have agreed in the past will agree in the future.

How Collaborative Filtering Works:

1. Collect Data:

User-Item Interactions: CF systems collect data on user interactions with items, such as ratings, purchases, clicks, or likes. This data is typically stored in a matrix where rows represent users, columns represent items, and the matrix cells contain interaction data (e.g., ratings).
2. Similarity Computation:

User-Based CF:
Similarity between Users: Compute the similarity between users based on their interaction patterns. Common similarity measures include Pearson correlation, cosine similarity, or Euclidean distance.
Item-Based CF:
Similarity between Items: Compute the similarity between items based on user ratings or interactions. This can also be done using measures like cosine similarity or Pearson correlation.
3. Make Predictions:

User-Based CF: For a target user, find similar users and aggregate their preferences to recommend items that the target user has not yet interacted with.
Item-Based CF: For a target item, find similar items and recommend those to users who have interacted with the target item.
4. Generate Recommendations:

Ranking: Rank items based on the predicted preference scores or similarity measures.
Filtering: Apply filters to ensure that recommended items are relevant and not already interacted with by the user.