# 📚 SVD Book Recommendation Model Training (Google Colab)
This notebook loads preprocessed data, trains a collaborative filtering model using SVD (via SGD), evaluates it, and downloads the trained model.

In [None]:
!pip install tqdm
from google.colab import files
uploaded = files.upload()  # Upload your processed CSV files here



Saving processed_books.csv to processed_books.csv
Saving processed_ratings.csv to processed_ratings.csv
Saving processed_users.csv to processed_users.csv


In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from datetime import datetime
import pickle
import os
from scipy.sparse import csr_matrix
from sklearn.metrics import mean_squared_error, mean_absolute_error
import json
from pathlib import Path
from datetime import datetime
import shutil


In [None]:
# Training Parameters
SVD_PARAMS = {
    "n_factors": 50,
    "n_epochs": 20,
    "learning_rate": 0.005,
    "regularization": 0.02
}
MODEL_DIR = "models"
os.makedirs(MODEL_DIR, exist_ok=True)

# Load CSVs
books = pd.read_csv("processed_books.csv")
users = pd.read_csv("processed_users.csv")
ratings = pd.read_csv("processed_ratings.csv")
print("✅ Data loaded successfully.")

# Create sparse matrix
user_ids = ratings['User-ID'].unique()
item_ids = ratings['ISBN'].unique()
user_map = {uid: i for i, uid in enumerate(user_ids)}
item_map = {iid: i for i, iid in enumerate(item_ids)}

rows = ratings['User-ID'].map(user_map)
cols = ratings['ISBN'].map(item_map)
data = ratings['Book-Rating'].astype(np.float32)

from scipy.sparse import csr_matrix
rating_matrix = csr_matrix((data, (rows, cols)), shape=(len(user_ids), len(item_ids)))
print("✅ Ratings matrix created.")

# Initialize latent matrices
n_users, n_items = rating_matrix.shape
U = np.random.normal(scale=1. / SVD_PARAMS['n_factors'], size=(n_users, SVD_PARAMS['n_factors']))
V = np.random.normal(scale=1. / SVD_PARAMS['n_factors'], size=(n_items, SVD_PARAMS['n_factors']))
coo = rating_matrix.tocoo()

# Train using SGD
print("\n🚀 Starting training...")
from tqdm import tqdm

for epoch in range(SVD_PARAMS['n_epochs']):
    total_loss = 0
    print(f"\nEpoch {epoch + 1}/{SVD_PARAMS['n_epochs']}")
    for u, i, r in tqdm(zip(coo.row, coo.col, coo.data), total=len(coo.data)):
        pred = np.dot(U[u], V[i])
        err = r - pred
        total_loss += err ** 2

        U[u] += SVD_PARAMS['learning_rate'] * (err * V[i] - SVD_PARAMS['regularization'] * U[u])
        V[i] += SVD_PARAMS['learning_rate'] * (err * U[u] - SVD_PARAMS['regularization'] * V[i])

    print(f"Loss: {total_loss:.4f}")

print("\n✅✅ Training completed successfully! Model is ready for evaluation or saving.")






✅ Data loaded successfully.
✅ Ratings matrix created.

🚀 Starting training...

Epoch 1/20


100%|██████████| 646630/646630 [00:21<00:00, 29452.83it/s]


Loss: 14354439.6852

Epoch 2/20


100%|██████████| 646630/646630 [00:12<00:00, 49764.24it/s]


Loss: 14341653.7414

Epoch 3/20


100%|██████████| 646630/646630 [00:12<00:00, 50248.24it/s]


Loss: 14143099.4184

Epoch 4/20


100%|██████████| 646630/646630 [00:12<00:00, 51435.85it/s]


Loss: 12997566.1774

Epoch 5/20


100%|██████████| 646630/646630 [00:12<00:00, 49773.29it/s]


Loss: 11583010.5628

Epoch 6/20


100%|██████████| 646630/646630 [00:12<00:00, 52009.11it/s]


Loss: 10351334.3032

Epoch 7/20


100%|██████████| 646630/646630 [00:12<00:00, 50043.87it/s]


Loss: 9331863.2402

Epoch 8/20


100%|██████████| 646630/646630 [00:13<00:00, 48289.62it/s]


Loss: 8435783.5293

Epoch 9/20


100%|██████████| 646630/646630 [00:12<00:00, 50261.46it/s]


Loss: 7619518.9338

Epoch 10/20


100%|██████████| 646630/646630 [00:12<00:00, 51355.63it/s]


Loss: 6867999.9956

Epoch 11/20


100%|██████████| 646630/646630 [00:12<00:00, 52084.60it/s]


Loss: 6178277.1669

Epoch 12/20


100%|██████████| 646630/646630 [00:12<00:00, 51817.20it/s]


Loss: 5549540.5504

Epoch 13/20


100%|██████████| 646630/646630 [00:12<00:00, 51879.76it/s]


Loss: 4979998.9896

Epoch 14/20


100%|██████████| 646630/646630 [00:12<00:00, 51966.68it/s]


Loss: 4466929.9624

Epoch 15/20


100%|██████████| 646630/646630 [00:12<00:00, 50434.92it/s]


Loss: 4006720.2034

Epoch 16/20


100%|██████████| 646630/646630 [00:12<00:00, 51031.11it/s]


Loss: 3595153.5368

Epoch 17/20


100%|██████████| 646630/646630 [00:13<00:00, 49533.20it/s]


Loss: 3227808.6568

Epoch 18/20


100%|██████████| 646630/646630 [00:12<00:00, 51881.84it/s]


Loss: 2900298.0064

Epoch 19/20


100%|██████████| 646630/646630 [00:13<00:00, 47279.71it/s]


Loss: 2608461.7745

Epoch 20/20


100%|██████████| 646630/646630 [00:13<00:00, 49365.84it/s]


Loss: 2348494.6439

✅✅ Training completed successfully! Model is ready for evaluation or saving.


TypeError: keys must be str, int, float, bool or None, not int64

In [None]:
predicted = np.dot(U, V.T)
actual = rating_matrix.toarray()
mask = actual != 0

rmse = np.sqrt(mean_squared_error(actual[mask], predicted[mask]))
mae = mean_absolute_error(actual[mask], predicted[mask])
print(f"\n✅ Evaluation complete.\nRMSE: {rmse:.4f}, MAE: {mae:.4f}")


NameError: name 'U' is not defined

In [None]:
import numpy as np
import json
import shutil
from pathlib import Path
from datetime import datetime
from google.colab import files

# Set up directory
portable_dir = Path("portable_model")
portable_dir.mkdir(parents=True, exist_ok=True)

# 1. Save U and Vt matrices
np.save(portable_dir / "U.npy", U)
np.save(portable_dir / "Vt.npy", V.T)  # Transpose V to make Vt

# 2. Convert keys to JSON-safe format
user_map_json = {int(k): v for k, v in user_map.items()}
item_map_json = {str(k): v for k, v in item_map.items()}  # ISBNs should be strings

# 3. Save mappings
with open(portable_dir / "user_mapping.json", "w") as f:
    json.dump(user_map_json, f)

with open(portable_dir / "item_mapping.json", "w") as f:
    json.dump(item_map_json, f)

# 4. Save model metadata
model_meta = {
    "svd_params": SVD_PARAMS,
    "created_at": datetime.now().isoformat()
}
with open(portable_dir / "model_meta.json", "w") as f:
    json.dump(model_meta, f)

# 5. Zip the folder
shutil.make_archive("svd_portable_model", 'zip', portable_dir)
print("✅ Model saved as portable zip!")

# 6. Download the zip file
files.download("svd_portable_model.zip")


✅ Model saved as portable zip!


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
from google.colab import files
files.download(filename)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>