# 📚 SVD Book Recommendation Model Training (Google Colab)
This notebook loads preprocessed data, trains a collaborative filtering model using SVD (via SGD), evaluates it, and downloads the trained model.

In [1]:
!pip install tqdm
from google.colab import files
uploaded = files.upload()  # Upload your processed CSV files here



Saving processed_books.csv to processed_books.csv
Saving processed_ratings.csv to processed_ratings.csv
Saving processed_users.csv to processed_users.csv


In [3]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from datetime import datetime
import pickle
import os
from scipy.sparse import csr_matrix
from sklearn.metrics import mean_squared_error, mean_absolute_error


In [6]:
# Training Parameters
SVD_PARAMS = {
    "n_factors": 50,
    "n_epochs": 20,
    "learning_rate": 0.005,
    "regularization": 0.02
}
MODEL_DIR = "models"
os.makedirs(MODEL_DIR, exist_ok=True)

# Load CSVs
books = pd.read_csv("processed_books.csv")
users = pd.read_csv("processed_users.csv")
ratings = pd.read_csv("processed_ratings.csv")
print("✅ Data loaded successfully.")

# Create sparse matrix
user_ids = ratings['User-ID'].unique()
item_ids = ratings['ISBN'].unique()
user_map = {uid: i for i, uid in enumerate(user_ids)}
item_map = {iid: i for i, iid in enumerate(item_ids)}

rows = ratings['User-ID'].map(user_map)
cols = ratings['ISBN'].map(item_map)
data = ratings['Book-Rating'].astype(np.float32)

from scipy.sparse import csr_matrix
rating_matrix = csr_matrix((data, (rows, cols)), shape=(len(user_ids), len(item_ids)))
print("✅ Ratings matrix created.")

# Initialize latent matrices
n_users, n_items = rating_matrix.shape
U = np.random.normal(scale=1. / SVD_PARAMS['n_factors'], size=(n_users, SVD_PARAMS['n_factors']))
V = np.random.normal(scale=1. / SVD_PARAMS['n_factors'], size=(n_items, SVD_PARAMS['n_factors']))
coo = rating_matrix.tocoo()

# Train using SGD
print("\n🚀 Starting training...")
from tqdm import tqdm

for epoch in range(SVD_PARAMS['n_epochs']):
    total_loss = 0
    print(f"\nEpoch {epoch + 1}/{SVD_PARAMS['n_epochs']}")
    for u, i, r in tqdm(zip(coo.row, coo.col, coo.data), total=len(coo.data)):
        pred = np.dot(U[u], V[i])
        err = r - pred
        total_loss += err ** 2

        U[u] += SVD_PARAMS['learning_rate'] * (err * V[i] - SVD_PARAMS['regularization'] * U[u])
        V[i] += SVD_PARAMS['learning_rate'] * (err * U[u] - SVD_PARAMS['regularization'] * V[i])

    print(f"Loss: {total_loss:.4f}")

print("\n✅✅ Training completed successfully! Model is ready for evaluation or saving.")


✅ Data loaded successfully.
✅ Ratings matrix created.

🚀 Starting training...

Epoch 1/20


100%|██████████| 646630/646630 [00:12<00:00, 51573.57it/s]


Loss: 14354460.9522

Epoch 2/20


100%|██████████| 646630/646630 [00:12<00:00, 53156.98it/s]


Loss: 14340310.1506

Epoch 3/20


100%|██████████| 646630/646630 [00:12<00:00, 53017.07it/s]


Loss: 14103043.4109

Epoch 4/20


100%|██████████| 646630/646630 [00:12<00:00, 52575.27it/s]


Loss: 12900217.6457

Epoch 5/20


100%|██████████| 646630/646630 [00:12<00:00, 52391.82it/s]


Loss: 11497295.1784

Epoch 6/20


100%|██████████| 646630/646630 [00:12<00:00, 50680.00it/s]


Loss: 10289937.6276

Epoch 7/20


100%|██████████| 646630/646630 [00:13<00:00, 49639.09it/s]


Loss: 9294861.2296

Epoch 8/20


100%|██████████| 646630/646630 [00:12<00:00, 51797.27it/s]


Loss: 8416735.6888

Epoch 9/20


100%|██████████| 646630/646630 [00:11<00:00, 54277.91it/s]


Loss: 7610521.5193

Epoch 10/20


100%|██████████| 646630/646630 [00:12<00:00, 52466.22it/s]


Loss: 6865078.0660

Epoch 11/20


100%|██████████| 646630/646630 [00:12<00:00, 52272.17it/s]


Loss: 6179520.8481

Epoch 12/20


100%|██████████| 646630/646630 [00:12<00:00, 52451.04it/s]


Loss: 5553986.5631

Epoch 13/20


100%|██████████| 646630/646630 [00:12<00:00, 49931.60it/s]


Loss: 4986833.6510

Epoch 14/20


100%|██████████| 646630/646630 [00:12<00:00, 51142.79it/s]


Loss: 4474840.4412

Epoch 15/20


100%|██████████| 646630/646630 [00:12<00:00, 53801.00it/s]


Loss: 4014420.0442

Epoch 16/20


100%|██████████| 646630/646630 [00:12<00:00, 52094.58it/s]


Loss: 3601888.9117

Epoch 17/20


100%|██████████| 646630/646630 [00:12<00:00, 53229.66it/s]


Loss: 3233334.7735

Epoch 18/20


100%|██████████| 646630/646630 [00:12<00:00, 53673.24it/s]


Loss: 2904678.6338

Epoch 19/20


100%|██████████| 646630/646630 [00:12<00:00, 51217.70it/s]


Loss: 2611893.5985

Epoch 20/20


100%|██████████| 646630/646630 [00:12<00:00, 52247.66it/s]

Loss: 2351190.1600

✅✅ Training completed successfully! Model is ready for evaluation or saving.





In [10]:
import pickle
from datetime import datetime

model_data = {
    "U": U,
    "Vt": V.T,
    "user_mapping": user_map,
    "item_mapping": item_map,
    "params": SVD_PARAMS
}

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"models/svd_model_{timestamp}.pkl"

with open(filename, "wb") as f:
    pickle.dump(model_data, f)

print(f"✅ Model saved to: {filename}")


✅ Model saved to: models/svd_model_20250409_125709.pkl


In [11]:
from google.colab import files
files.download(filename)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>