In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow.keras import layers
import pymc as pm

In [2]:
df = pd.read_csv("C:\\Code\\Data\\MovieRecommenderSystem\\ratings.csv")
R = df.pivot(index="userId", columns="movieId", values="rating").fillna(0).values[:,:900] # N x M matrix

n = R.shape[0] # number of users
m = R.shape[1] # number of items
d = 10 # dimensionality of the latent vector 

U = np.random.normal(0,1,size=(n,d)) # user matrix
V = np.random.normal(0,1,size=(m,d)) # item matrix

I = np.where(R == 0,0,1) # Indicator Matrix -> 0 if R_ij is missing, 1 otherwiese.
I_mask = R == 0

In [2]:
R.shape

NameError: name 'R' is not defined

## Baselines

In [3]:
def abs_error(pred):
    return np.sum(np.abs(pred * I - R))/np.sum(I)

In [4]:
# Uniform baseline, just predict a random rating between 1 and 5 for every pair of item/user
R_uniform = np.random.randint(1,6, size=(n,m))
abs_error(R_uniform)

1.4797889642587965

In [5]:
# always predict the global mean
R_global_mean = np.ones((n,m)) * np.mean(R, where=R!=0)
abs_error(R_global_mean)

0.8271026565093422

In [6]:
# taking the average of the usermean, the moviemean and the global mean.

movie_mean = R.mean(axis=0, where=R!=0)[np.newaxis, :]
movie_mean[np.isnan(movie_mean)] = 0


user_mean = R.mean(axis=1, where=R!=0)[:, np.newaxis]
user_mean[np.isnan(user_mean)] = 0
R_means_of_means = 1/3 * (R_global_mean + movie_mean + user_mean)
abs_error(R_means_of_means)


0.705679751309807

## PMF

We will model the ratings as draws from a gaussian dristribution, where we have a different distribution $R_{ij}$ for each user item combination. The mean of the distribution is $U_iV_j^{T}$

In [None]:
n_rated = (~I_mask).sum() # number of movies that have been rated
n_not_rated = I_mask.sum() # number of movies that have not been rated

R_mean_imputed = R
R_mean_imputed[I_mask] = R[~I_mask].mean()

In [None]:
alpha_u = 1 / R.var(axis=1).mean()
alpha_v = 1/ R.var(axis=0).mean()

In [None]:
coords={
        "users":np.arange(n),
        "movies":np.arange(m),
        "latent":np.arange(d),
        "obs_id":np.arange(n_rated)
        }

In [None]:
with pm.Model(coords=coords) as pmf:
    U = pm.MvNormal(
                "U",
                mu=0,
                tau= alpha_u * np.eye(d),
                dims=("users", "latent")
    )

    V = pm.MvNormal(
                "V",
                mu=0,
                tau=alpha_v * np.eye(d),
                dims=("movies", "latent")
    )

    R_ = pm.Normal(
                "R",
                mu=(U @ V.T)[~I_mask],
                tau=2,
                dims="obs_id",
                observed=R[~I_mask],
            )

    trace = pm.sample(draws=100)