In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import copy, math
# scaler = StandardScaler()
import tensorflow as tf
from tensorflow import keras
# from recsys_utils import *

In [2]:
column_names = ['User ID', 'Movie ID', 'Rating', 'Timestamp']
df = pd.read_csv('../../notes and data/ml-100k/u.data',sep='\t', names=column_names)
df = df.drop(columns=['Timestamp'])
df

Unnamed: 0,User ID,Movie ID,Rating
0,196,242,3
1,186,302,3
2,22,377,1
3,244,51,2
4,166,346,1
...,...,...,...
99995,880,476,3
99996,716,204,5
99997,276,1090,1
99998,13,225,2


In [3]:
y = df.pivot(index='Movie ID', columns='User ID', values='Rating')
y = y.fillna(0)
y = np.array(y)
y

array([[5., 4., 0., ..., 5., 0., 0.],
       [3., 0., 0., ..., 0., 0., 5.],
       [4., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [4]:
M = (y != 0).astype(int)
print(y.shape,M.shape)

(1682, 943) (1682, 943)


In [5]:
# def costFn(x,y,w,b,lam):
#     cost = 0
#     pred = 0
#     reg = 0
#     for i in range(y.shape[0]):
#         for j in range(y.shape[1]):
#             if m[i][j] == 1:
#                 val = np.dot(w[j],x[i]) + b[j] - y[i][j]
#                 pred += (val**2)/2
                
#     for i in range(y.shape[0]):  # Regularization for X
#         reg += np.sum(x[i] ** 2)

#     for j in range(y.shape[1]):  # Regularization for W and b
#         reg += np.sum(w[j] ** 2) + b[j] ** 2

#     reg = (lam/2)*reg

#     cost = pred + reg  # Divide by 2m for proper scaling
#     return cost


def costFn(X, Y,W, b,m, lam):
    
    # nm, nu = Y.shape
    # J = 0
    # ### START CODE HERE ###  
    # for j in range(nu):
    #     w = W[j,:]
    #     b_j = b[0,j]
    #     for i in range(nm):
    #         x = X[i,:]
    #         y = Y[i,j]
    #         r = m[i,j]
    #         J += r * np.square((np.dot(w,x) + b_j - y ))
    # J += (lam) * (np.sum(np.square(W)) + np.sum(np.square(X)))
    # J = J/2
    # ### END CODE HERE ### 

    # return J
    j = (tf.linalg.matmul(X, tf.transpose(W)) + b - Y)*m
    J = 0.5 * tf.reduce_sum(j**2) + (lam/2) * (tf.reduce_sum(X**2) + tf.reduce_sum(W**2))
    return J


In [6]:
def gradientfn(x,y,w,b,m,lam):
    djx = np.zeros_like(x)
    djw = np.zeros_like(w)
    djb = np.zeros_like(b)
    for i in range(y.shape[0]):
        for k in range(len(x[i])):
            for j in range(y.shape[1]):
                if m[i][j] == 1:
                    val = np.dot(w[j],x[i]) + b[0,j] - y[i][j]
                    djx[i][k] += val*w[j][k]
            djx[i][k] += lam*x[i][k]
            
    for j in range(y.shape[1]):
        for k in range(len(w[j])):
            for i in range(y.shape[0]):
                if m[i][j] == 1:
                    val = np.dot(w[j],x[i]) + b[0,j] - y[i][j]
                    djw[j][k] += val*x[i][k]
            djw[j][k] += lam*w[j][k]
            
    for j in range(y.shape[1]):
        for i in range(y.shape[0]):
            if m[i][j] == 1:
                val = np.dot(w[j],x[i]) + b[0,j] - y[i][j]
                djb[0,j] += val    
    
    return djw, djb ,djx 

In [7]:
def gradientDecent(x,y,w_in,b_in,alpha,iters,lam,m):
    # x = copy.deepcopy(x)
    # w = copy.deepcopy(w_in) #avoid modifying global w within function
    # b = copy.deepcopy(b_in)
    # for i in range(iters):
    #     djw,djb,djx = gradientfn(x,y,w,b,m,lam)
    #     w = w - alpha*djw
    #     b = b - alpha*djb
    #     x = x - alpha*djx
    #     # if i % 100 == 0 and i > 0:
    #     #     alpha *= 0.9
    #     if i % math.ceil(iters / 10) == 0:
    #         cost = costFn(x,y,w,b,m,lam)
    #         print(f"Iteration {i:4d}: Cost {cost:.4f}")
    for iter in range(iters):
        with tf.GradientTape() as tape:
            # Compute the cost (forward pass included in costFn)
            cost_value = costFn(x, y, w_in, b_in, m, lam)
        
        # Compute gradients
        grads = tape.gradient(cost_value, [x, w_in, b_in])
        
        # Apply gradients to variables
        optimizer.apply_gradients(zip(grads, [x, w_in, b_in]))

        # Log periodically
        if iter % 20 == 0:
            print(f"Training loss at iteration {iter}: {cost_value.numpy():0.1f}")
    
    return w, b, x

In [8]:
num_features = 100
num_movies, num_users = y.shape  # Total number of users (943)
subset_users = 50  # Number of users you want to use for faster processing
optimizer = keras.optimizers.Adam(learning_rate=0.5)

alpha = 0.01  # Reduce the learning rate to avoid divergence
iterations = 200
lam = 1

# Initialize features and weights with small random values instead of zeros
# initial_x = np.random.randn(y_train.shape[0], 10) * 0.1  # Small random values
# initial_w = np.random.randn(y_train.shape[1], 10) * 0 # Small random values
# initial_b = np.random.randn(y_train.shape[1]) * 0  # Small random values
# print(initial_x.shape,initial_w.shape,initial_b.shape)

tf.random.set_seed(1234) # for consistent results
w = tf.Variable(tf.random.normal((num_users,  num_features),dtype=tf.float64),  name='w')
x = tf.Variable(tf.random.normal((num_movies, num_features),dtype=tf.float64),  name='x')
b = tf.Variable(tf.random.normal((1,          num_users),   dtype=tf.float64),  name='b')


In [9]:
w,b,x = gradientDecent(x,y,w,b,alpha,iterations,lam,M)

Training loss at iteration 0: 5868396.1
Training loss at iteration 20: 177330.5
Training loss at iteration 40: 43508.2
Training loss at iteration 60: 14480.3
Training loss at iteration 80: 8370.1
Training loss at iteration 100: 6802.1
Training loss at iteration 120: 6247.4
Training loss at iteration 140: 5985.0
Training loss at iteration 160: 5834.4
Training loss at iteration 180: 5736.9


In [10]:
def predict(xt, wt, bt):
    print(xt.shape,wt.shape,bt.shape)
    # predicted_ratings = np.dot(x, w.T) + b  # Matrix multiplication for all users and all movies
    predicted_ratings = np.matmul(xt.numpy(), np.transpose(wt.numpy())) + bt.numpy()
    return predicted_ratings

In [11]:
y_pred = predict(x, w, b)
y_pred_masked = y_pred * M
y_masked = y * M
y_flat = y_masked[M > 0]
y_pred_flat = y_pred_masked[M > 0]
mse = mean_squared_error(y_flat, y_pred_flat)
rmse = np.sqrt(mse)
print(y_flat)
print(y_pred_flat)
print(r2_score(y_flat, y_pred_flat))
print(r2_score(y_masked, y_pred_masked))

(1682, 100) (943, 100) (1, 943)
[5. 4. 4. ... 2. 3. 3.]
[5.0563958  3.98092921 4.07081698 ... 2.09407768 2.98150608 3.01820845]
0.9928158230654482
0.9992053355632778
