In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from scipy.sparse import csr_matrix
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import normalize

df = pd.read_csv('/content/gdrive/MyDrive/Books.csv')

df['ISBN'] = pd.Categorical(df['ISBN'])
df['book_id'] = df['ISBN'].cat.codes

id_title_map = {}
for i, row in df.iterrows():
    id_title_map[row['book_id']] = row['Book-Title']

num_users = df['User-ID'].nunique()
num_books = df['book_id'].nunique()
data_matrix = csr_matrix((df['Book-Rating'], (df['User-ID']-1, df['book_id'])), shape=(num_users, num_books))

train_data, test_data = train_test_split(data_matrix, test_size=0.1, random_state=42)
train_data, dev_data = train_test_split(train_data, test_size=0.1, random_state=42)

train_data_norm = normalize(train_data)

item_sim = cosine_similarity(train_data_norm.T)

test_users = test_data.nonzero()[0]
recommendations = {}
for user in test_users:
    user_ratings = train_data[user, :]
    rated_items = user_ratings.nonzero()[1]
    scores = item_sim[rated_items, :].sum(axis=0)
    top_items = np.argsort(scores)[::-1][:30]
    recommendations[user] = [(id_title_map[item], scores[item]) for item in top_items]

test_users, test_items = test_data.nonzero()
test_ratings = test_data[test_users, test_items]
predicted_ratings = np.zeros_like(test_ratings)
for i, (user, item) in enumerate(zip(test_users, test_items)):
    item_vec = train_data_norm[:, item]
    rated_items = train_data_norm[user, :].nonzero()[1]
    scores = item_sim[rated_items, item]
    top_scores = scores[np.argsort(scores)[::-1][:30]]
    top_rated = train_data[user, np.argsort(scores)[::-1][:30]].toarray().ravel()
    predicted_ratings[i] = np.dot(top_scores, top_rated) / np.sum(top_scores)
mae = mean_absolute_error(test_ratings, predicted_ratings)
rmse = np.sqrt(mean_squared_error(test_ratings, predicted_ratings))
print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
