# Recomendation system

In [155]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer, StandardScaler

In [156]:
# Load the dataset
anime = pd.read_csv('C:\\Users\\user\\Desktop\\excler assinements\\Recommendation System\\anime.csv')


# Handle missing values


In [157]:
anime.fillna({'genre': '', 'rating': anime['rating'].mean(), 'episodes': 0}, inplace=True)


# Feature Extraction


In [158]:
# Convert 'genre' to numerical representation using one-hot encoding
mlb = MultiLabelBinarizer()
anime['genre'] = anime['genre'].apply(lambda x: x.split(', ') if x else [])
genre_encoded = mlb.fit_transform(anime['genre'])
genre_df = pd.DataFrame(genre_encoded, columns=mlb.classes_)

In [159]:
# Normalize numerical features
scaler = StandardScaler()
anime['rating'] = scaler.fit_transform(anime[['rating']])


In [160]:
# Combine features
features = pd.concat([genre_df, anime[['rating']]], axis=1)

In [161]:
# Compute cosine similarity on training set
train_features = features.loc[train.index]
cos_sim = cosine_similarity(train_features)

# Recommendation function based on cosine similarity
def recommend_anime(anime_id, df=train, features=train_features, cosine_sim=cos_sim, top_n=10):
    idx = df.index[df['anime_id'] == anime_id].tolist()[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n+1]
    anime_indices = [i[0] for i in sim_scores]
    return df.iloc[anime_indices]


# model evolution


In [162]:

def recommend_anime(anime_id, cosine_sim=cos_sim, df=anime, top_n=10):
    if anime_id not in df['anime_id'].values:
        return pd.DataFrame()  # Return empty DataFrame if anime_id not in training set
    
    idx = df.index[df['anime_id'] == anime_id].tolist()[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n+1]
    anime_indices = [i[0] for i in sim_scores]
    return df[['anime_id', 'name']].iloc[anime_indices]


In [163]:
# Example Recommendation
recommended_anime = recommend_anime(anime_id=9963, top_n=10)
print(recommended_anime)


      anime_id                                               name
1956       935                                         Witchblade
1581      8115                               Uchuu Show e Youkoso
2132     21881                                Sword Art Online II
4880      1964                      Sakura Taisen: Ecole de Paris
839       2963                                          Minami-ke
1573      6325  Naruto: Shippuuden Movie 3 - Hi no Ishi wo Tsu...
1853     10444  Digimon Xros Wars: Aku no Death General to Nan...
4305      1718                                      Winter Garden
7500      8935                                  Nekketsu Uchuujin
8524     15137                                    Entotsuya Peroo


# Evaluation

In [164]:
# Split dataset into training and testing sets
train, test = train_test_split(anime, test_size=0.2, random_state=42)

# Initialize lists to store ground truth and predictions
ground_truth = []
predictions = []

# Mean rating to determine relevance
mean_rating = train['rating'].mean()

for _, row in test.iterrows():
    anime_id = row['anime_id']
    
    # Skip anime not present in the training set
    if anime_id not in train['anime_id'].values:
        continue
    
    recommended_anime = recommend_anime(anime_id, top_n=10)
    
    if recommended_anime.empty:
        continue
    
    actual_ratings = test[test['anime_id'].isin(recommended_anime['anime_id'])]['rating']
    true_relevant = (actual_ratings > mean_rating).astype(int).tolist()
    
    ground_truth.extend(true_relevant)
    predictions.extend([1] * len(true_relevant))

# Include non-relevant predictions to avoid all 1s in predictions
if not predictions:
    predictions = [0]

# Include non-relevant items in ground truth to avoid all 1s
if not ground_truth:
    ground_truth = [0]

# Compute precision, recall, and F1-score
precision = precision_score(ground_truth, predictions, average='macro', zero_division=0)
recall = recall_score(ground_truth, predictions, average='macro', zero_division=0)
f1 = f1_score(ground_truth, predictions, average='macro', zero_division=0)

print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1-Score: {f1:.2f}')

Precision: 1.00
Recall: 1.00
F1-Score: 1.00
