load dataset from movielens


In [None]:
!wget -nc https://files.grouplens.org/datasets/movielens/ml-100k.zip
!unzip -n ml-100k.zip

--2025-05-11 18:26:20--  https://files.grouplens.org/datasets/movielens/ml-100k.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4924029 (4.7M) [application/zip]
Saving to: ‘ml-100k.zip’


2025-05-11 18:26:21 (14.3 MB/s) - ‘ml-100k.zip’ saved [4924029/4924029]

Archive:  ml-100k.zip
   creating: ml-100k/
  inflating: ml-100k/allbut.pl       
  inflating: ml-100k/mku.sh          
  inflating: ml-100k/README          
  inflating: ml-100k/u.data          
  inflating: ml-100k/u.genre         
  inflating: ml-100k/u.info          
  inflating: ml-100k/u.item          
  inflating: ml-100k/u.occupation    
  inflating: ml-100k/u.user          
  inflating: ml-100k/u1.base         
  inflating: ml-100k/u1.test         
  inflating: ml-100k/u2.base         
  inflating: ml-100k/u2.test         
  inflating: ml-100k/u3.base  

normal SGD


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

ratings_df = pd.read_csv('ml-100k/u.data', sep='\t', names=['user_id', 'movie_id', 'rating', 'timestamp'])

# Adjust user and movie IDs to start from 0
ratings_df['user_id'] -= 1
ratings_df['movie_id'] -= 1

ratings_list = list(ratings_df[['user_id', 'movie_id', 'rating']].itertuples(index=False, name=None))

n_users = ratings_df['user_id'].nunique()
n_movies = ratings_df['movie_id'].nunique()

train_ratings, test_ratings = train_test_split(ratings_list, test_size=0.2, random_state=42)

print(f"Data Loaded: {len(ratings_list)} ratings, {len(train_ratings)} train, {len(test_ratings)} test.")

def init_model(n_users, n_movies, n_factors=20):
    np.random.seed(42)
    U = np.random.normal(0, 0.1, (n_users, n_factors)) # Initialize user feature matrix U (n_users × n_factors) with small random numbers
    V = np.random.normal(0, 0.1, (n_movies, n_factors)) # Initialize item feature matrix V (n_movies × n_factors) with small random numbers
    user_bias = np.zeros(n_users)
    item_bias = np.zeros(n_movies)
    global_mean = 0
    return U, V, user_bias, item_bias, global_mean

def train_model_sgd(U, V, user_bias, item_bias, global_mean, train_data, n_factors=20, lr=0.01, reg=0.1, epochs=40):
    global_mean = np.mean([r for _, _, r in train_data])

    for ep in range(epochs):
        np.random.shuffle(train_data)
        total_loss = 0

        for user, movie, rating in train_data:
            pred = predict(U, V, user_bias, item_bias, global_mean, user, movie)
            err = rating - pred
            total_loss += err ** 2

            # Update biases
            user_bias[user] += lr * (err - reg * user_bias[user])
            item_bias[movie] += lr * (err - reg * item_bias[movie])

            # Update latent features
            U_old = U[user].copy()
            V_old = V[movie].copy()

            U[user] += lr * (err * V_old - reg * U_old)
            V[movie] += lr * (err * U_old - reg * V_old)

        mse = total_loss / len(train_data)
        print(f"Epoch {ep+1}/{epochs} - MSE: {mse:.4f}")

    return U, V, user_bias, item_bias, global_mean

def predict(U, V, user_bias, item_bias, global_mean, user, movie):
    dot = np.dot(U[user], V[movie])
    # Add biases and global mean
    return global_mean + user_bias[user] + item_bias[movie] + dot

U, V, user_bias, item_bias, global_mean = init_model(n_users, n_movies, n_factors=75)
U, V, user_bias, item_bias, global_mean = train_model_sgd(U, V, user_bias, item_bias, global_mean,
                                                      train_ratings, n_factors=75, lr=0.01, reg=0.1, epochs=75)

predictions = []
truths = []

for user, movie, real_rating in test_ratings:
    pred_rating = predict(U, V, user_bias, item_bias, global_mean, user, movie)
    predictions.append(pred_rating)
    truths.append(real_rating)

rmse = np.sqrt(mean_squared_error(truths, predictions))
print(f"\n✅ Test RMSE: {rmse:.4f}")


Data Loaded: 100000 ratings, 80000 train, 20000 test.
Epoch 1/75 - MSE: 1.0491
Epoch 2/75 - MSE: 0.9120
Epoch 3/75 - MSE: 0.8713
Epoch 4/75 - MSE: 0.8497
Epoch 5/75 - MSE: 0.8355
Epoch 6/75 - MSE: 0.8244
Epoch 7/75 - MSE: 0.8153
Epoch 8/75 - MSE: 0.8073
Epoch 9/75 - MSE: 0.7990
Epoch 10/75 - MSE: 0.7906
Epoch 11/75 - MSE: 0.7821
Epoch 12/75 - MSE: 0.7721
Epoch 13/75 - MSE: 0.7622
Epoch 14/75 - MSE: 0.7513
Epoch 15/75 - MSE: 0.7405
Epoch 16/75 - MSE: 0.7288
Epoch 17/75 - MSE: 0.7174
Epoch 18/75 - MSE: 0.7062
Epoch 19/75 - MSE: 0.6951
Epoch 20/75 - MSE: 0.6838
Epoch 21/75 - MSE: 0.6727
Epoch 22/75 - MSE: 0.6622
Epoch 23/75 - MSE: 0.6516
Epoch 24/75 - MSE: 0.6412
Epoch 25/75 - MSE: 0.6311
Epoch 26/75 - MSE: 0.6212
Epoch 27/75 - MSE: 0.6114
Epoch 28/75 - MSE: 0.6020
Epoch 29/75 - MSE: 0.5932
Epoch 30/75 - MSE: 0.5845
Epoch 31/75 - MSE: 0.5760
Epoch 32/75 - MSE: 0.5683
Epoch 33/75 - MSE: 0.5605
Epoch 34/75 - MSE: 0.5530
Epoch 35/75 - MSE: 0.5461
Epoch 36/75 - MSE: 0.5394
Epoch 37/75 - MSE: 

kernelized mean SGD

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

ratings_df = pd.read_csv('ml-100k/u.data', sep='\t', names=['user_id', 'movie_id', 'rating', 'timestamp'])
ratings_df['user_id'] -= 1
ratings_df['movie_id'] -= 1

ratings_list = list(ratings_df[['user_id', 'movie_id', 'rating']].itertuples(index=False, name=None))

n_users = ratings_df['user_id'].nunique()
n_movies = ratings_df['movie_id'].nunique()

train_data, test_data = train_test_split(ratings_list, test_size=0.2, random_state=42)

print(f"Data Loaded! {len(ratings_list)} ratings: {len(train_data)} train, {len(test_data)} test.")

def init_model(n_users, n_movies, n_factors=20):
    np.random.seed(0)
    U = np.random.normal(0, 0.1, (n_users, n_factors))
    V = np.random.normal(0, 0.1, (n_movies, n_factors))
    user_bias = np.zeros(n_users)
    item_bias = np.zeros(n_movies)
    global_mean = 0
    return U, V, user_bias, item_bias, global_mean

def rbf_kernel(x, y, sigma=1.0):
    """Gaussian (RBF) kernel between x and y."""
    diff = x - y
    return np.exp(-np.dot(diff, diff) / (2 * sigma ** 2))

def predict_rbf(U, V, user_bias, item_bias, global_mean, user, movie, sigma=1.0):
    kernel_val = rbf_kernel(U[user], V[movie], sigma)
    return global_mean + user_bias[user] + item_bias[movie] + kernel_val

def train_model_rbf(U, V, user_bias, item_bias, global_mean, train_data,
                      n_factors=20, lr=0.005, reg=0.1, epochs=30, sigma=1.0):
    global_mean = np.mean([r for _, _, r in train_data])

    for ep in range(epochs):
        np.random.shuffle(train_data)
        total_loss = 0

        for user, movie, rating in train_data:
            pred = predict_rbf(U, V, user_bias, item_bias, global_mean, user, movie, sigma)
            err = rating - pred
            total_loss += err ** 2

            # Gradients for kernel part
            diff = U[user] - V[movie]
            grad_kernel = (np.exp(-np.dot(diff, diff) / (2 * sigma ** 2)) * diff) / (sigma ** 2)

            # Update biases
            user_bias[user] += lr * (err - reg * user_bias[user])
            item_bias[movie] += lr * (err - reg * item_bias[movie])

            # Update latent features
            U[user] += lr * (err * grad_kernel - reg * U[user])
            V[movie] -= lr * (err * grad_kernel + reg * V[movie])

        mse = total_loss / len(train_data)
        print(f"Epoch {ep+1}/{epochs} - Train MSE: {mse:.4f}")

    return U, V, user_bias, item_bias, global_mean

U, V, user_bias, item_bias, global_mean = init_model(n_users, n_movies, n_factors=50)

U, V, user_bias, item_bias, global_mean = train_model_rbf(
    U, V, user_bias, item_bias, global_mean,
    train_data,
    n_factors=50, lr=0.01, reg=0.1,
    epochs=50, sigma=1.5
)

predictions = []
truths = []

for user, movie, real_rating in test_data:
    pred_rating = predict_rbf(U, V, user_bias, item_bias, global_mean, user, movie, sigma=1.5)
    predictions.append(pred_rating)
    truths.append(real_rating)

rmse = np.sqrt(mean_squared_error(truths, predictions))
print(f"\n✅ Test RMSE (KMSGD): {rmse:.3f}")

Data Loaded! 100000 ratings: 80000 train, 20000 test.
Epoch 1/50 - Train MSE: 1.2464
Epoch 2/50 - Train MSE: 0.9893
Epoch 3/50 - Train MSE: 0.9393
Epoch 4/50 - Train MSE: 0.9141
Epoch 5/50 - Train MSE: 0.8995
Epoch 6/50 - Train MSE: 0.8902
Epoch 7/50 - Train MSE: 0.8828
Epoch 8/50 - Train MSE: 0.8783
Epoch 9/50 - Train MSE: 0.8743
Epoch 10/50 - Train MSE: 0.8715
Epoch 11/50 - Train MSE: 0.8695
Epoch 12/50 - Train MSE: 0.8674
Epoch 13/50 - Train MSE: 0.8657
Epoch 14/50 - Train MSE: 0.8646
Epoch 15/50 - Train MSE: 0.8630
Epoch 16/50 - Train MSE: 0.8624
Epoch 17/50 - Train MSE: 0.8611
Epoch 18/50 - Train MSE: 0.8608
Epoch 19/50 - Train MSE: 0.8601
Epoch 20/50 - Train MSE: 0.8594
Epoch 21/50 - Train MSE: 0.8589
Epoch 22/50 - Train MSE: 0.8580
Epoch 23/50 - Train MSE: 0.8581
Epoch 24/50 - Train MSE: 0.8575
Epoch 25/50 - Train MSE: 0.8566
Epoch 26/50 - Train MSE: 0.8567
Epoch 27/50 - Train MSE: 0.8565
Epoch 28/50 - Train MSE: 0.8561
Epoch 29/50 - Train MSE: 0.8558
Epoch 30/50 - Train MSE: 0.

batch SGD


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

ratings_df = pd.read_csv('ml-100k/u.data', sep='\t', names=['user_id', 'movie_id', 'rating', 'timestamp'])
ratings_df['user_id'] -= 1
ratings_df['movie_id'] -= 1

ratings_list = list(ratings_df[['user_id', 'movie_id', 'rating']].itertuples(index=False, name=None))

n_users = ratings_df['user_id'].nunique()
n_movies = ratings_df['movie_id'].nunique()

train_data, test_data = train_test_split(ratings_list, test_size=0.2, random_state=0)

print(f"Data Loaded! {len(ratings_list)} ratings: {len(train_data)} train, {len(test_data)} test.")

def init_model(n_users, n_movies, n_factors=20):
    np.random.seed(0)
    U = np.random.normal(0, 0.1, (n_users, n_factors))
    V = np.random.normal(0, 0.1, (n_movies, n_factors))
    user_bias = np.zeros(n_users)
    item_bias = np.zeros(n_movies)
    global_mean = 0
    return U, V, user_bias, item_bias, global_mean

def predict(U, V, user_bias, item_bias, global_mean, user, movie):
    dot = np.dot(U[user], V[movie])
    return global_mean + user_bias[user] + item_bias[movie] + dot

def train_model_batch(U, V, user_bias, item_bias, global_mean, train_data,
                          n_factors=20, lr=0.005, reg=0.1, epochs=30, batch_size=32):
    global_mean = np.mean([r for _, _, r in train_data])

    for ep in range(epochs):
        np.random.shuffle(train_data)
        total_loss = 0

        for i in range(0, len(train_data), batch_size):
            batch = train_data[i:i+batch_size]

            # Initialize batch gradients
            grad_U = np.zeros_like(U)
            grad_V = np.zeros_like(V)
            grad_user_bias = np.zeros_like(user_bias)
            grad_item_bias = np.zeros_like(item_bias)

            for user, movie, rating in batch:
                pred = predict(U, V, user_bias, item_bias, global_mean, user, movie)
                err = rating - pred
                total_loss += err ** 2

                # Accumulate gradients
                grad_U[user] += -(err * V[movie]) + reg * U[user]
                grad_V[movie] += -(err * U[user]) + reg * V[movie]
                grad_user_bias[user] += -err + reg * user_bias[user]
                grad_item_bias[movie] += -err + reg * item_bias[movie]

            # Normalize gradients by batch size
            grad_U /= batch_size
            grad_V /= batch_size
            grad_user_bias /= batch_size
            grad_item_bias /= batch_size

            # Update parameters
            U += -lr * grad_U
            V += -lr * grad_V
            user_bias += -lr * grad_user_bias
            item_bias += -lr * grad_item_bias

        mse = total_loss / len(train_data)
        print(f"Epoch {ep+1}/{epochs} - Train MSE: {mse:.4f}")

    return U, V, user_bias, item_bias, global_mean


U, V, user_bias, item_bias, global_mean = init_model(n_users, n_movies, n_factors=50)

U, V, user_bias, item_bias, global_mean = train_model_batch(
    U, V, user_bias, item_bias, global_mean,
    train_data,
    n_factors=50, lr=0.01, reg=0.1,
    epochs=50, batch_size=4
)

predictions = []
truths = []

for user, movie, real_rating in test_data:
    pred_rating = predict(U, V, user_bias, item_bias, global_mean, user, movie)
    predictions.append(pred_rating)
    truths.append(real_rating)

rmse = np.sqrt(mean_squared_error(truths, predictions))
print(f"\n✅ Test RMSE (KMSGD): {rmse:.3f}")

Data Loaded! 100000 ratings: 80000 train, 20000 test.
Epoch 1/50 - Train MSE: 1.1594
Epoch 2/50 - Train MSE: 1.0340
Epoch 3/50 - Train MSE: 0.9774
Epoch 4/50 - Train MSE: 0.9444
Epoch 5/50 - Train MSE: 0.9223
Epoch 6/50 - Train MSE: 0.9061
Epoch 7/50 - Train MSE: 0.8935
Epoch 8/50 - Train MSE: 0.8834
Epoch 9/50 - Train MSE: 0.8751
Epoch 10/50 - Train MSE: 0.8682
Epoch 11/50 - Train MSE: 0.8621
Epoch 12/50 - Train MSE: 0.8571
Epoch 13/50 - Train MSE: 0.8523
Epoch 14/50 - Train MSE: 0.8483
Epoch 15/50 - Train MSE: 0.8447
Epoch 16/50 - Train MSE: 0.8413
Epoch 17/50 - Train MSE: 0.8384
Epoch 18/50 - Train MSE: 0.8356
Epoch 19/50 - Train MSE: 0.8329
Epoch 20/50 - Train MSE: 0.8306
Epoch 21/50 - Train MSE: 0.8285
Epoch 22/50 - Train MSE: 0.8263
Epoch 23/50 - Train MSE: 0.8242
Epoch 24/50 - Train MSE: 0.8222
Epoch 25/50 - Train MSE: 0.8204
Epoch 26/50 - Train MSE: 0.8186
Epoch 27/50 - Train MSE: 0.8168
Epoch 28/50 - Train MSE: 0.8150
Epoch 29/50 - Train MSE: 0.8133
Epoch 30/50 - Train MSE: 0.

batch + momentum SGD


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

ratings_df = pd.read_csv('ml-100k/u.data', sep='\t', names=['user_id', 'movie_id', 'rating', 'timestamp'])
ratings_df['user_id'] -= 1
ratings_df['movie_id'] -= 1

ratings_list = list(ratings_df[['user_id', 'movie_id', 'rating']].itertuples(index=False, name=None))

n_users = ratings_df['user_id'].nunique()
n_movies = ratings_df['movie_id'].nunique()

train_data, test_data = train_test_split(ratings_list, test_size=0.2, random_state=42)

print(f"Data Loaded! {len(ratings_list)} ratings: {len(train_data)} train, {len(test_data)} test.")

def init_model(n_users, n_movies, n_factors=20):
    np.random.seed(0)
    U = np.random.normal(0, 0.1, (n_users, n_factors))
    V = np.random.normal(0, 0.1, (n_movies, n_factors))
    user_bias = np.zeros(n_users)
    item_bias = np.zeros(n_movies)
    global_mean = 0
    return U, V, user_bias, item_bias, global_mean

def predict(U, V, user_bias, item_bias, global_mean, user, movie):
    dot = np.dot(U[user], V[movie])
    return global_mean + user_bias[user] + item_bias[movie] + dot

def train_model_sgd_batch_momentum(U, V, user_bias, item_bias, global_mean, train_data,
                                   n_factors=20, lr=0.005, reg=0.1, epochs=30, batch_size=32,
                                   momentum=0.9):
    global_mean = np.mean([r for _, _, r in train_data])

    # Initialize momentum terms (velocities)
    vel_U = np.zeros_like(U)
    vel_V = np.zeros_like(V)
    vel_user_bias = np.zeros_like(user_bias)
    vel_item_bias = np.zeros_like(item_bias)

    for ep in range(epochs):
        np.random.shuffle(train_data)
        total_loss = 0

        for i in range(0, len(train_data), batch_size):
            batch = train_data[i:i+batch_size]

            grad_U = np.zeros_like(U)
            grad_V = np.zeros_like(V)
            grad_user_bias = np.zeros_like(user_bias)
            grad_item_bias = np.zeros_like(item_bias)

            for user, movie, rating in batch:
                pred = predict(U, V, user_bias, item_bias, global_mean, user, movie)
                err = rating - pred
                total_loss += err ** 2

                grad_U[user] += -(err * V[movie]) + reg * U[user]
                grad_V[movie] += -(err * U[user]) + reg * V[movie]
                grad_user_bias[user] += -err + reg * user_bias[user]
                grad_item_bias[movie] += -err + reg * item_bias[movie]

            # Average gradients
            grad_U /= batch_size
            grad_V /= batch_size
            grad_user_bias /= batch_size
            grad_item_bias /= batch_size

            # Update with momentum
            vel_U = momentum * vel_U - lr * grad_U
            vel_V = momentum * vel_V - lr * grad_V
            vel_user_bias = momentum * vel_user_bias - lr * grad_user_bias
            vel_item_bias = momentum * vel_item_bias - lr * grad_item_bias

            U += vel_U
            V += vel_V
            user_bias += vel_user_bias
            item_bias += vel_item_bias

        mse = total_loss / len(train_data)
        print(f"Epoch {ep+1}/{epochs} - Train MSE: {mse:.4f}")

    return U, V, user_bias, item_bias, global_mean

U, V, user_bias, item_bias, global_mean = init_model(n_users, n_movies, n_factors=75)

U, V, user_bias, item_bias, global_mean = train_model_sgd_batch_momentum(
    U, V, user_bias, item_bias, global_mean,
    train_data,
    n_factors=75, lr=0.01, reg=0.1,
    epochs=75, batch_size=16, momentum=0.9
)

predictions = []
truths = []

for user, movie, real_rating in test_data:
    pred_rating = predict(U, V, user_bias, item_bias, global_mean, user, movie)
    predictions.append(pred_rating)
    truths.append(real_rating)

rmse = np.sqrt(mean_squared_error(truths, predictions))
print(f"\n✅ Test RMSE (KMSGD): {rmse:.4f}")

Data Loaded! 100000 ratings: 80000 train, 20000 test.
Epoch 1/75 - Train MSE: 1.0887
Epoch 2/75 - Train MSE: 0.9466
Epoch 3/75 - Train MSE: 0.8993
Epoch 4/75 - Train MSE: 0.8730
Epoch 5/75 - Train MSE: 0.8559
Epoch 6/75 - Train MSE: 0.8433
Epoch 7/75 - Train MSE: 0.8337
Epoch 8/75 - Train MSE: 0.8258
Epoch 9/75 - Train MSE: 0.8188
Epoch 10/75 - Train MSE: 0.8128
Epoch 11/75 - Train MSE: 0.8069
Epoch 12/75 - Train MSE: 0.8018
Epoch 13/75 - Train MSE: 0.7960
Epoch 14/75 - Train MSE: 0.7909
Epoch 15/75 - Train MSE: 0.7851
Epoch 16/75 - Train MSE: 0.7794
Epoch 17/75 - Train MSE: 0.7735
Epoch 18/75 - Train MSE: 0.7670
Epoch 19/75 - Train MSE: 0.7607
Epoch 20/75 - Train MSE: 0.7539
Epoch 21/75 - Train MSE: 0.7470
Epoch 22/75 - Train MSE: 0.7400
Epoch 23/75 - Train MSE: 0.7325
Epoch 24/75 - Train MSE: 0.7255
Epoch 25/75 - Train MSE: 0.7184
Epoch 26/75 - Train MSE: 0.7111
Epoch 27/75 - Train MSE: 0.7040
Epoch 28/75 - Train MSE: 0.6967
Epoch 29/75 - Train MSE: 0.6897
Epoch 30/75 - Train MSE: 0.

sliding wall SGD

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load and prepare data
ratings_df = pd.read_csv('ml-100k/u.data', sep='\t', names=['user_id', 'movie_id', 'rating', 'timestamp'])
ratings_df['user_id'] -= 1
ratings_df['movie_id'] -= 1

ratings_list = list(ratings_df[['user_id', 'movie_id', 'rating']].itertuples(index=False, name=None))
n_users = ratings_df['user_id'].nunique()
n_movies = ratings_df['movie_id'].nunique()

train_data, test_data = train_test_split(ratings_list, test_size=0.2, random_state=42)

print(f"Data Loaded! {len(ratings_list)} ratings: {len(train_data)} train, {len(test_data)} test.")

def init_model(n_users, n_movies, n_factors=20):
    np.random.seed(0)
    U = np.random.normal(0, 0.1, (n_users, n_factors))
    V = np.random.normal(0, 0.1, (n_movies, n_factors))
    return U, V

def train_swsgd(U, V, train_data, n_factors=20, lr=0.01, reg=0.1, epochs=30, wall_size=1000, sample_k=5):
    wall = []  # Sliding wall buffer
    total_ratings = len(train_data)

    for ep in range(epochs):
        np.random.shuffle(train_data)
        total_loss = 0
        count = 0

        for user, movie, rating in train_data:
            # Add to wall
            wall.append((user, movie, rating))
            if len(wall) > wall_size:
                wall.pop(0)  # Maintain sliding window

            # Sample from wall
            sample_size = min(sample_k, len(wall))
            sample_indices = np.random.choice(len(wall), size=sample_size, replace=False)

            for idx in sample_indices:
                u, i, r_ui = wall[idx]
                pred = np.dot(U[u], V[i])
                err = r_ui - pred

                total_loss += err ** 2
                count += 1

                # SGD update
                U[u] += lr * (err * V[i] - reg * U[u])
                V[i] += lr * (err * U[u] - reg * V[i])

        mse = total_loss / count
        print(f"Epoch {ep+1}/{epochs} - Train MSE: {mse:.4f}")

    return U, V

# Initialize and train
U, V = init_model(n_users, n_movies, n_factors=50)
U, V = train_swsgd(U, V, train_data, n_factors=50, lr=0.01, reg=0.1, epochs=75, wall_size=1000, sample_k=5)

# Evaluate
predictions = []
truths = []

for user, movie, real_rating in test_data:
    pred_rating = np.dot(U[user], V[movie])
    predictions.append(pred_rating)
    truths.append(real_rating)

rmse = np.sqrt(mean_squared_error(truths, predictions))
print(f"\n✅ Test RMSE (Sliding Wall SGD): {rmse:.4f}")


Data Loaded! 100000 ratings: 80000 train, 20000 test.
Epoch 1/75 - Train MSE: 3.0429
Epoch 2/75 - Train MSE: 0.6742
Epoch 3/75 - Train MSE: 0.6141
Epoch 4/75 - Train MSE: 0.5749
Epoch 5/75 - Train MSE: 0.5447
Epoch 6/75 - Train MSE: 0.5176
Epoch 7/75 - Train MSE: 0.4975
Epoch 8/75 - Train MSE: 0.4805
Epoch 9/75 - Train MSE: 0.4665
Epoch 10/75 - Train MSE: 0.4546
Epoch 11/75 - Train MSE: 0.4467
Epoch 12/75 - Train MSE: 0.4383
Epoch 13/75 - Train MSE: 0.4316
Epoch 14/75 - Train MSE: 0.4262
Epoch 15/75 - Train MSE: 0.4200
Epoch 16/75 - Train MSE: 0.4165
Epoch 17/75 - Train MSE: 0.4127
Epoch 18/75 - Train MSE: 0.4076
Epoch 19/75 - Train MSE: 0.4061
Epoch 20/75 - Train MSE: 0.4030
Epoch 21/75 - Train MSE: 0.4025
Epoch 22/75 - Train MSE: 0.4004
Epoch 23/75 - Train MSE: 0.3965
Epoch 24/75 - Train MSE: 0.3968
Epoch 25/75 - Train MSE: 0.3955
Epoch 26/75 - Train MSE: 0.3931
Epoch 27/75 - Train MSE: 0.3908
Epoch 28/75 - Train MSE: 0.3937
Epoch 29/75 - Train MSE: 0.3918
Epoch 30/75 - Train MSE: 0.