In [714]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
import time

In [715]:
ratings = pd.read_csv('data/ratings_small.csv')

In [716]:
movie_details = pd.read_csv('data/movies_metadata.csv')

In [717]:
movie_details.columns

Index(['adult', 'belongs_to_collection', 'budget', 'genres', 'homepage', 'id',
       'imdb_id', 'original_language', 'original_title', 'overview',
       'popularity', 'poster_path', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title', 'video',
       'vote_average', 'vote_count'],
      dtype='object')

In [718]:
movie_details = movie_details[['id','title','budget','popularity','vote_average', 'vote_count']]

In [719]:
movie_details.head(10)

Unnamed: 0,id,title,budget,popularity,vote_average,vote_count
0,862,Toy Story,30000000,21.9469,7.7,5415.0
1,8844,Jumanji,65000000,17.0155,6.9,2413.0
2,15602,Grumpier Old Men,0,11.7129,6.5,92.0
3,31357,Waiting to Exhale,16000000,3.85949,6.1,34.0
4,11862,Father of the Bride Part II,0,8.38752,5.7,173.0
5,949,Heat,60000000,17.9249,7.7,1886.0
6,11860,Sabrina,58000000,6.67728,6.2,141.0
7,45325,Tom and Huck,0,2.56116,5.4,45.0
8,9091,Sudden Death,35000000,5.23158,5.5,174.0
9,710,GoldenEye,58000000,14.686,6.6,1194.0


In [720]:
ratings.columns

Index(['userId', 'movieId', 'rating', 'timestamp'], dtype='object')

In [721]:
ratings = ratings[['userId', 'movieId', 'rating']]

In [722]:
ratings_ij = np.array(ratings['rating'])

In [723]:
users = np.array(ratings['userId'])
users = np.unique(users)
movies = np.array(ratings['movieId'])
movies = np.unique(movies)
ratings_ij = np.array(ratings['rating'])

In [724]:
m = len(users)
print("Users:", m)

n = len(movies)
print("Movies:", n)

Users: 671
Movies: 9066


In [725]:
movie_map = {}

for i in range(n):
    movie_map[movies[i]] = i+1

In [726]:
ratings['movieId'] = ratings.apply(lambda x: movie_map[x['movieId']], axis = 1)

In [778]:
#Hyperparmeters
a = 2
b = 1
au = 2
bu = 1
av = 2
bv = 1

#Dimensions
k = 10

In [728]:
pairs_ij = ratings.apply(lambda x: (int(x['userId']-1),int(x['movieId']-1)), axis = 1)
pairs_ij = np.array(pairs_ij)

In [729]:
ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,31,2.5
1,1,834,3.0
2,1,860,3.0
3,1,907,2.0
4,1,932,4.0


In [730]:
U0 = np.random.multivariate_normal([0.5]*k, 1*np.eye(k),m)
V0 = np.random.multivariate_normal([0.5]*k, 1*np.eye(k),n)
U = U0
V = V0

In [731]:
UV = np.dot(U0, np.transpose(V0))
U2 = np.diagonal(np.dot(U0, np.transpose(U0)))
V2 = np.diagonal(np.dot(V0, np.transpose(V0)))

In [732]:
uv_ij = np.array([UV[pairs_ij[i]] for i in range(len(pairs_ij))])
uv_vals = (ratings_ij - uv_ij)**2

In [733]:
lambda_ij = np.random.gamma(a + 0.5, (1/b) + uv_vals/2)

In [734]:
lambda_ui = np.random.gamma(au + 0.5, (1/bu) + U2)
lambda_vj = np.random.gamma(av + 0.5, (1/bv) + V2)

In [735]:
c = np.reshape(U0,(m,k,1))
d = np.reshape(U0,(m,1,k))
U_covar = np.multiply(c,d)

In [736]:
c = np.reshape(V0,(n,k,1))
d = np.reshape(V0,(n,1,k))
V_covar = np.multiply(c,d)

In [737]:
mat = csr_matrix((lambda_ij, (np.array(ratings['userId']-1), np.array(ratings['movieId']-1))), shape=(m, n)) 

In [738]:
Ui1 = mat.dot(np.reshape(V_covar,(n,k*k)))
Ui1 = Ui1.reshape(m,k,k)

Vj1 = mat.transpose().dot(np.reshape(U_covar,(m,k*k)))
Vj1 = Vj1.reshape(n,k,k)

In [739]:
Ui = Ui1 + np.array([i*np.eye(k) for i in lambda_ui])
Vj = Vj1 + np.array([i*np.eye(k) for i in lambda_vj])

In [740]:
Sigma_Ui = np.array([np.linalg.inv(i) for i in Ui])
Sigma_Vj = np.array([np.linalg.inv(j) for j in Vj])

In [741]:
val_ij = np.multiply(ratings_ij, lambda_ij)
new_mat = csr_matrix((val_ij, (np.array(ratings['userId']-1), np.array(ratings['movieId']-1))), shape=(m, n)) 

In [742]:
ui2 = new_mat.dot(V0)
vj2 = new_mat.transpose().dot(U0)

In [743]:
mu_u = np.array([np.dot(Sigma_Ui[i],ui2[i]) for i in range(m)])
mu_v = np.array([np.dot(Sigma_Vj[i],vj2[i]) for i in range(n)])

In [744]:
U = np.array([np.random.multivariate_normal(mu_u[i],Sigma_Ui[i]) for i in range(m)])
V = np.array([np.random.multivariate_normal(mu_v[j],Sigma_Vj[j]) for j in range(n)])

  


In [745]:
U_list = []
V_list = []
iterations = 100

In [746]:
start = time.time()
for i in range(iterations):
    UV = np.dot(U, np.transpose(V))
    U2 = np.diagonal(np.dot(U, np.transpose(U)))
    V2 = np.diagonal(np.dot(V, np.transpose(V)))
    uv_ij = np.array([UV[pairs_ij[i]] for i in range(len(pairs_ij))])
    uv_vals = (ratings_ij - uv_ij)**2
    lambda_ij = np.random.gamma(a + 0.5, (1/b) + uv_vals/2)
    lambda_ui = np.random.gamma(au + 0.5, (1/bu) + U2)
    lambda_vj = np.random.gamma(av + 0.5, (1/bv) + V2)
    c = np.reshape(U,(m,k,1))
    d = np.reshape(U,(m,1,k))
    U_covar = np.multiply(c,d)
    c = np.reshape(V,(n,k,1))
    d = np.reshape(V,(n,1,k))
    V_covar = np.multiply(c,d)
    mat = csr_matrix((lambda_ij, (np.array(ratings['userId']-1), np.array(ratings['movieId']-1))), shape=(m, n)) 
    Ui1 = mat.dot(np.reshape(V_covar,(n,k*k)))
    Ui1 = Ui1.reshape(m,k,k)
    Vj1 = mat.transpose().dot(np.reshape(U_covar,(m,k*k)))
    Vj1 = Vj1.reshape(n,k,k)
    Ui = Ui1 + np.array([i*np.eye(k) for i in lambda_ui])
    Vj = Vj1 + np.array([i*np.eye(k) for i in lambda_vj])
    Sigma_Ui = np.array([np.linalg.inv(i) for i in Ui])
    Sigma_Vj = np.array([np.linalg.inv(j) for j in Vj])
    val_ij = np.multiply(ratings_ij, lambda_ij)
    new_mat = csr_matrix((val_ij, (np.array(ratings['userId']-1), np.array(ratings['movieId']-1))), shape=(m, n)) 
    ui2 = new_mat.dot(V)
    vj2 = new_mat.transpose().dot(U)
    mu_u = np.array([np.dot(Sigma_Ui[i],ui2[i]) for i in range(m)])
    mu_v = np.array([np.dot(Sigma_Vj[i],vj2[i]) for i in range(n)])
    U = np.array([np.random.multivariate_normal(mu_u[i],Sigma_Ui[i]) for i in range(m)])
    V = np.array([np.random.multivariate_normal(mu_v[j],Sigma_Vj[j]) for j in range(n)])
    U_list.append(U)
    V_list.append(V)
print(time.time() - start)



182.37178373336792


In [747]:
drop = 10
final_Us = np.array(U_list[drop:])
final_Vs = np.array(V_list[drop:])

In [748]:
mean_U = np.mean(final_Us, axis = 0)
mean_V = np.mean(final_Vs, axis = 0)

In [749]:
rating_mat = np.array([np.dot(final_Us[i],np.transpose(final_Vs[i])) for i in range(iterations-drop)])

In [750]:
predicted_ratings = np.mean(rating_mat, axis = 0)

In [751]:
ratings_pred = np.exp(predicted_ratings)/(1+np.exp(predicted_ratings))

  """Entry point for launching an IPython kernel.
  """Entry point for launching an IPython kernel.


In [776]:
5*ratings_pred[0, 2380]

4.9051933258067795

In [777]:
predicted_ratings[0,2380]

3.9462099755857536

In [754]:
pairs_ij[:30]

array([(0, 30), (0, 833), (0, 859), (0, 906), (0, 931), (0, 1017),
       (0, 1041), (0, 1047), (0, 1083), (0, 1087), (0, 1111), (0, 1140),
       (0, 1515), (0, 1665), (0, 1708), (0, 1743), (0, 1815), (0, 1962),
       (0, 2380), (0, 2925), (1, 9), (1, 16), (1, 37), (1, 45), (1, 48),
       (1, 49), (1, 58), (1, 100), (1, 123), (1, 129)], dtype=object)

In [755]:
ratings.head(20)

Unnamed: 0,userId,movieId,rating
0,1,31,2.5
1,1,834,3.0
2,1,860,3.0
3,1,907,2.0
4,1,932,4.0
5,1,1018,2.0
6,1,1042,2.0
7,1,1048,2.0
8,1,1084,3.5
9,1,1088,2.0


In [756]:
mean_U[0]

array([0.41360551, 0.57787917, 0.55001896, 0.49530804, 0.38735914,
       0.60481244, 0.53832553, 0.51358016, 0.31017364, 0.51025099])

In [757]:
mu_u[0]

array([ 0.73173986, -0.2401908 ,  0.39285737,  0.22228345,  0.02319101,
        0.1728647 , -0.05100056,  0.46161911, -0.74513961, -0.88751965])

In [758]:
Sigma_Ui[0]

array([[ 0.1292905 , -0.00376168, -0.01445312,  0.003301  ,  0.01515981,
         0.03854528, -0.04663019, -0.04932194,  0.00402183, -0.05432506],
       [-0.00376168,  0.01988001, -0.01128948, -0.00989443,  0.00475098,
        -0.00714948,  0.02405011, -0.00804302,  0.010453  , -0.00861924],
       [-0.01445312, -0.01128948,  0.09481114, -0.0331151 , -0.07008793,
         0.02782985, -0.018002  ,  0.02089072,  0.00785609,  0.01396839],
       [ 0.003301  , -0.00989443, -0.0331151 ,  0.04222118,  0.01164023,
        -0.00021192, -0.01130019, -0.00596664, -0.00991022,  0.01286585],
       [ 0.01515981,  0.00475098, -0.07008793,  0.01164023,  0.08731077,
        -0.02586813, -0.01894594, -0.00025543, -0.0078015 , -0.01557726],
       [ 0.03854528, -0.00714948,  0.02782985, -0.00021192, -0.02586813,
         0.0431385 , -0.02572308, -0.01022886, -0.0024394 , -0.0130256 ],
       [-0.04663019,  0.02405011, -0.018002  , -0.01130019, -0.01894594,
        -0.02572308,  0.12416861, -0.0544368 

In [759]:
lambda_ui[0]

1.8569431024423266

In [760]:
np.random.multivariate_normal(mu_u[0],Sigma_Ui[0])

array([ 0.84321633, -0.36318297,  0.82101661,  0.2170112 , -0.43620285,
        0.31039505, -0.35107132,  0.70800541, -0.69919334, -0.92932654])