# Given

dataset of ratings $Y$ in (4778, 443). 
$R=1$ where rating is present
 
# Find

Calibrate system using collaborating filtering approach.

# Solution

In [198]:
import numpy as np
import tensorflow as tf

Load data

In [199]:
y = np.loadtxt("./Lab3_data_small_Y.csv", delimiter=",")
r = np.loadtxt("./Lab3_data_small_R.csv", delimiter=",")

y.shape, r.shape

((4778, 443), (4778, 443))

Normalize $Y$

In [200]:
y_mean = np.mean(y,axis=1)
y_norm = y - y_mean.reshape(-1,1)

Define cost function $J$

In [201]:
def cost_function(w, b, x, y_norm, r, _lambda): # re-written in tf language to be compatible with gradient calculation

    sm = tf.linalg.matmul(x,w) + b - y_norm
    rated_only = sm * r

    J = tf.reduce_sum(rated_only**2) / 2 + (tf.reduce_sum(w**2) + tf.reduce_sum(x**2)) * _lambda / 2

    return J

Initialize $X$, $W$, and $B$ in TensorFlow

In [202]:
films_qty, users_qty = y.shape
features_qty = 100

optimizer = tf.keras.optimizers.Adam(learning_rate=0.1)

X = tf.Variable(tf.random.uniform((films_qty, features_qty), dtype=tf.float64),name='X')
W = tf.Variable(tf.random.uniform((features_qty, users_qty), dtype=tf.float64),name='W')
B = tf.Variable(tf.random.uniform((1, users_qty), dtype=tf.float64),name='B')

X.shape, W.shape, B.shape

(TensorShape([4778, 100]), TensorShape([100, 443]), TensorShape([1, 443]))

Run GD

In [203]:
iterations = 200

for iter in range(iterations):

    with tf.GradientTape() as tape:
        cost = cost_function(W,B,X,y_norm,r,_lambda=1)

    grads = tape.gradient(cost, [W,B,X])

    optimizer.apply_gradients(zip(grads, [W,B,X]))

    if iter%20==19:
        print(iter, "->", cost)

19 -> tf.Tensor(251015.52131505072, shape=(), dtype=float64)
39 -> tf.Tensor(50944.94356940118, shape=(), dtype=float64)
59 -> tf.Tensor(20623.739413997457, shape=(), dtype=float64)
79 -> tf.Tensor(15323.413397671298, shape=(), dtype=float64)
99 -> tf.Tensor(12655.42373644491, shape=(), dtype=float64)
119 -> tf.Tensor(10997.14178079836, shape=(), dtype=float64)
139 -> tf.Tensor(9799.395493704491, shape=(), dtype=float64)
159 -> tf.Tensor(8893.341498032245, shape=(), dtype=float64)
179 -> tf.Tensor(8187.478069256026, shape=(), dtype=float64)
199 -> tf.Tensor(7624.894149167421, shape=(), dtype=float64)


Predict

In [204]:
def predict(W,B,X):

    x,w,b = X.numpy(), W.numpy(), B.numpy()

    prediction = np.matmul(x, w) + b + y_mean.reshape(-1,1)

    return prediction

Accuracy

In [205]:
prediction_r1 = predict(W,B,X)[r==1].round(2)
r1 = y[r==1]

np.mean(prediction_r1 - r1)

-0.0394869574077848