In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error
import numpy as np

# Load the dataset
df = pd.read_csv('anime_ratings.csv')

# Create a pivot table with users as rows, anime as columns, and ratings as values
ratings_matrix = df.pivot_table(index='user_id', columns='anime_id', values='rating')

# Fill NaN values with 0 (or you can use mean ratings)
ratings_matrix.fillna(0, inplace=True)

# Compute the cosine similarity between users
user_similarity = cosine_similarity(ratings_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=ratings_matrix.index, columns=ratings_matrix.index)

def predict_user_based(user_id, anime_id):
    # Get similarity scores for the user with other users
    sim_scores = user_similarity_df[user_id]
    
    # Get the ratings of the anime by all users
    ratings = ratings_matrix[anime_id]
    
    # Calculate the weighted average of ratings
    weighted_ratings = sim_scores.dot(ratings) / sim_scores.sum()
    
    return weighted_ratings

# Example usage
user_id = 1
anime_id = 101
predicted_rating = predict_user_based(user_id, anime_id)
print(f"Predicted Rating for User {user_id} on Anime {anime_id}: {predicted_rating}")


In [None]:
# Compute the cosine similarity between items (anime)
item_similarity = cosine_similarity(ratings_matrix.T)
item_similarity_df = pd.DataFrame(item_similarity, index=ratings_matrix.columns, columns=ratings_matrix.columns)

def predict_item_based(user_id, anime_id):
    # Get the ratings of the user for all animes
    user_ratings = ratings_matrix.loc[user_id]
    
    # Get similarity scores for the anime with other animes
    sim_scores = item_similarity_df[anime_id]
    
    # Calculate the weighted average of ratings
    weighted_ratings = sim_scores.dot(user_ratings) / sim_scores.sum()
    
    return weighted_ratings

# Example usage
predicted_rating_item = predict_item_based(user_id, anime_id)
print(f"Predicted Rating for User {user_id} on Anime {anime_id} (Item-Based): {predicted_rating_item}")

from sklearn.model_selection import train_test_split

# Split the data
train_data, test_data = train_test_split(df, test_size=0.2)

# Create a pivot table for training data
train_matrix = train_data.pivot_table(index='user_id', columns='anime_id', values='rating').fillna(0)

# Predict ratings on the test set
def rmse(pred, actual):
    pred = pred[actual.nonzero()].flatten()
    actual = actual[actual.nonzero()].flatten()
    return np.sqrt(mean_squared_error(pred, actual))

test_user_similarity = cosine_similarity(train_matrix)
test_user_similarity_df = pd.DataFrame(test_user_similarity, index=train_matrix.index, columns=train_matrix.index)

test_preds = []
test_actuals = []

for _, row in test_data.iterrows():
    user_id = row['user_id']
    anime_id = row['anime_id']
    actual_rating = row['rating']
    
    if anime_id in train_matrix.columns:
        predicted_rating = predict_user_based(user_id, anime_id)
        test_preds.append(predicted_rating)
        test_actuals.append(actual_rating)

# Calculate RMSE
test_rmse = rmse(np.array(test_preds), np.array(test_actuals))
print(f"Test RMSE: {test_rmse}")

