In [1]:
import numpy as np

<figure>
   <img src="./images/ColabFilterLearn.PNG"  style="width:740px;height:250px;" >
</figure>

$Y$ is the matrix of actual user ratings. It serves as the "ground truth" dataset containing the scores that users have given to specific movies.Here is the breakdown of its structure:

The Matrix: It is a grid where values represent ratings (e.g., on a scale of 1 to 5).

The Columns (Users): Each column represents a unique user (User 0, User 1, etc.).

The Rows (Movies): Each row represents a specific movie or item.

The Values (Cells): The number inside a specific cell $Y(i,j)$ is the rating given by User $j$ to Movie $i$.

In [7]:
# Load data directly from file paths
X = np.loadtxt('./data/small_movies_X.csv', delimiter=',')
W = np.loadtxt('./data/small_movies_W.csv', delimiter=',')
b = np.loadtxt('./data/small_movies_b.csv', delimiter=',').reshape(1, -1)
Y = np.loadtxt('./data/small_movies_Y.csv',delimiter=',')
R = np.loadtxt('./data/small_movies_R.csv',delimiter=',')

# Extract dimensions
num_movies, num_features = X.shape
num_users = W.shape[0]

In [8]:
X[0]

array([-0.03328682,  1.1667464 , -0.5064895 ,  0.1250965 ,  1.5347596 ,
       -0.21013564,  0.10417578, -0.349303  ,  0.75385165, -0.20810875])

In [9]:
print("Y", Y.shape, "R", R.shape)
print("X", X.shape)
print("W", W.shape)
print("b", b.shape)
print("num_features", num_features)
print("num_movies",   num_movies)
print("num_users",    num_users)

Y (4778, 443) R (4778, 443)
X (4778, 10)
W (443, 10)
b (1, 443)
num_features 10
num_movies 4778
num_users 443



<a name="4.1"></a>
### Collaborative filtering cost function

The collaborative filtering cost function is given by
$$J({\mathbf{x}^{(0)},...,\mathbf{x}^{(n_m-1)},\mathbf{w}^{(0)},b^{(0)},...,\mathbf{w}^{(n_u-1)},b^{(n_u-1)}})= \left[ \frac{1}{2}\sum_{(i,j):r(i,j)=1}(\mathbf{w}^{(j)} \cdot \mathbf{x}^{(i)} + b^{(j)} - y^{(i,j)})^2 \right]
+ \underbrace{\left[
\frac{\lambda}{2}
\sum_{j=0}^{n_u-1}\sum_{k=0}^{n-1}(\mathbf{w}^{(j)}_k)^2
+ \frac{\lambda}{2}\sum_{i=0}^{n_m-1}\sum_{k=0}^{n-1}(\mathbf{x}_k^{(i)})^2
\right]}_{regularization}
\tag{1}$$
The first summation in (1) is "for all $i$, $j$ where $r(i,j)$ equals $1$" and could be written:

$$
= \left[ \frac{1}{2}\sum_{j=0}^{n_u-1} \sum_{i=0}^{n_m-1}r(i,j)*(\mathbf{w}^{(j)} \cdot \mathbf{x}^{(i)} + b^{(j)} - y^{(i,j)})^2 \right]
+\text{regularization}
$$



In [14]:
def collaborative_filtering_cost_function(X,W,Y,R,b,n_f,n_m,n_u,lambda_):
    J=0
    for j in range(n_u):
        w=W[j,:]
        b_j=b[0,j]
        for i in range(n_m):
            x=X[i,:]
            r=R[i,j]
            y=Y[i,j]

            if r==1:
                pred=np.dot(w,x)+b_j
                J=J+np.square(pred-y)
    J=J/2
    J=J+(lambda_ / 2) * (np.sum(np.square(W)) + np.sum(np.square(X)))    
    return J

In [15]:
# Evaluate cost function with regularization 
J = collaborative_filtering_cost_function(X, W, Y, R,b,num_features,num_movies,num_users, 1.5);
print(f"Cost (with regularization): {J:0.2f}")

Cost (with regularization): 306504.87
