In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
import stan 

import multiprocessing
multiprocessing.set_start_method("fork")

from concurrent.futures import ThreadPoolExecutor as _ThreadPoolExecutor

def _exec_async(func, *args, **kwargs):
    with _ThreadPoolExecutor(max_workers=1) as executor:
        future = executor.submit(func, *args, **kwargs)
    return future.result()

def stan_build(*args, **kwargs): return _exec_async(stan.build, *args, **kwargs)

ModuleNotFoundError: No module named 'stan'

In [3]:
Xte = np.loadtxt('top-ratings-imputed.txt', delimiter=' ')
Xtr = np.loadtxt('top-ratings-missing.txt', delimiter=' ')
Xte = Xte[0:50,0:100] 
Xtr = Xtr[0:50,0:100]
names = {}
i = 0
with open("top-names.txt", "r") as f:  
    for lines in f.readlines():
        names[i] = lines
        i+=1
#print(names)

In [4]:
recsys = """
data {
 int<lower=1> M; // Total number of movies
 int<lower=1> N; // Total number of users
 int<lower=1> R; // Total number of ratings
 int<lower=0> K; // Number of latent dimensions
 int usr[R]; // user id for r'th rating
 int movie[R]; // movie id for r'th rating
 vector<lower=1,upper=10> [R] rating; // vector of rating values (1..10)
}
transformed data { // transform interval 1..10 to [-3,3]
 vector [R] pred = log( (rating+.5)./(10.5-rating) );
} // inverse is: 0.5 + 10./(1+exp(-pred))
parameters {
 vector [N] u;
 vector [M] v;
 matrix [N,K] U; // latent dimensions
 matrix [M,K] V; 
// vector [R] pred;
}
//transformed parameters {
// vector [R] rating = .5 + 10.0./(1+exp(-pred));
//}
model{
 u ~ normal(0,1);
 v ~ normal(0,1);
 to_vector(U) ~ normal(0,1);
 to_vector(V) ~ normal(0,1);
 for (r in 1:R) { 
 pred[r] ~ normal(u[usr[r]]+v[movie[r]]+U[usr[r],]*V[movie[r],]', 0.1); 
 //rating[r] ~ normal(predB[r], 0.01);
 }
}
"""

In [5]:
N,M = Xtr.shape
usr,movie = np.where(Xtr!=-1)  # find non-NaN's
rating = Xtr[usr,movie]
print(rating.shape)
## NOTE: Stan uses 1-based indexing, compared to python's zero-based, so adjust:
recdata = {"M": M, "N": N, "R":len(rating), "K":2, 
           "usr":   usr+1,
           "movie": movie+1,
           "rating":rating+1}
recdata3 = {"M": M, "N": N, "R":len(rating), "K":3, 
           "usr":   usr+1,
           "movie": movie+1,
           "rating":rating+1}
recdata4 = {"M": M, "N": N, "R":len(rating), "K":4, 
           "usr":   usr+1,
           "movie": movie+1,
           "rating":rating+1}           

(4952,)


In [6]:
#stan.build(schools_code, data=schools_data, ...)
posterior = stan_build(recsys, data=recdata, random_seed=0)
posterior3 = stan_build(recsys, data=recdata3, random_seed=0)
posterior4 = stan_build(recsys, data=recdata4, random_seed=0)

Building...



Building: found in cache, done.

Building...



Building: found in cache, done.

Building...



Building: found in cache, done.

In [7]:
import pickle
with open('stan_recsys.pkl', 'wb') as f: pickle.dump(posterior, f)
with open('stan_recsys3.pkl', 'wb') as f: pickle.dump(posterior3, f)
with open('stan_recsys4.pkl', 'wb') as f: pickle.dump(posterior4, f)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import stan
import multiprocessing
multiprocessing.set_start_method("fork")
from concurrent.futures import ThreadPoolExecutor as _ThreadPoolExecutor
def _exec_async(func, *args, **kwargs):
 with _ThreadPoolExecutor(max_workers=1) as executor:
    future = executor.submit(func, *args, **kwargs)
 return future.result()

In [None]:
def stan_sample(model, *args, **kwargs): return _exec_async(model.sample, *args, **kwargs)

In [None]:
Xtr = np.loadtxt('top-ratings-missing.txt', delimiter=' ')
Xtr = Xtr[0:50,0:100]
N,M = Xtr.shape
usr,movie = np.where(Xtr != -1) # find non-NaN's
rating = Xtr[usr,movie]


In [None]:
import pickle
posterior = pickle.load(open('stan_recsys.pkl', 'rb'))

In [None]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')
fit = stan_sample(posterior, num_chains=1, num_samples=500)

In [None]:
with open('stan_fit.pkl', 'wb') as f: pickle.dump(fit, f)

In [None]:
def xform(s): return .5 + 10.0/(1+np.exp(-s))-1

In [None]:
rTr = np.zeros((len(usr),fit.num_samples))
for i,n,m in zip(range(len(usr)),usr,movie):
 rTr[i,:] = xform(fit["u"][n,:]+fit["v"][m,:]+(fit["U"][n,:,:]*fit["V"][m,:,:]).sum(0))
print(rTr)


In [None]:
usr2,movie2 = np.where(Xtr == -1) # find all NaN's
rhat = np.zeros((len(usr2),fit.num_samples))
def xform(s): return .5 + 10.0/(1+np.exp(-s))-1
for i,n,m in zip(range(len(usr2)),usr2,movie2):
 rhat[i,:] = xform(fit["u"][n,:]+fit["v"][m,:]+(fit["U"][n,:,:]*fit["V"][m,:,:]).sum(0))

In [None]:

Xte = np.loadtxt('top-ratings-imputed.txt', delimiter=' ')
Xte = Xte[0:50,0:100]

print("Training RMSE: ", np.sqrt(((rTr.mean(1)-rating)**2).mean()))
print("Test RMSE: ", np.sqrt(((rhat.mean(1)-Xte[usr2,movie2])**2).mean()))

In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import stan
import multiprocessing
multiprocessing.set_start_method("fork")
from concurrent.futures import ThreadPoolExecutor as _ThreadPoolExecutor
def _exec_async(func, *args, **kwargs):
 with _ThreadPoolExecutor(max_workers=1) as executor:
    future = executor.submit(func, *args, **kwargs)
 return future.result()

In [2]:
def stan_sample(model, *args, **kwargs): return _exec_async(model.sample, *args, **kwargs)

In [3]:
Xtr = np.loadtxt('top-ratings-missing.txt', delimiter=' ')
Xtr = Xtr[0:50,0:100]
N,M = Xtr.shape
usr,movie = np.where(Xtr != -1) # find non-NaN's
rating = Xtr[usr,movie]


In [4]:
import pickle
posterior3 = pickle.load(open('stan_recsys3.pkl', 'rb'))
posterior4 = pickle.load(open('stan_recsys4.pkl', 'rb'))

In [None]:
fit3 = stan_sample(posterior3, num_chains=1, num_samples=500)
with open('stan_fit3.pkl', 'wb') as f: pickle.dump(fit3, f)

In [None]:
def stan_sample(model, *args, **kwargs): return _exec_async(model.sample, *args, **kwargs)

In [5]:
fit4 = stan_sample(posterior4, num_chains=1, num_samples=500)
with open('stan_fit5.pkl', 'wb') as f: pickle.dump(fit4, f)

Sampling:   0%
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
Sampling:   0% (1/1500)
S

In [6]:
fit4

<stan.Fit>
Parameters:
    u: (50,)
    v: (100,)
    U: (50, 4)
    V: (100, 4)
Draws: 500

In [None]:
def xform(s): return .5 + 10.0/(1+np.exp(-s))-1

rTr = np.zeros((len(usr),fit3.num_samples))
for i,n,m in zip(range(len(usr)),usr,movie):
 rTr[i,:] = xform(fit3["u"][n,:]+fit3["v"][m,:]+(fit3["U"][n,:,:]*fit3["V"][m,:,:]).sum(0))
print(rTr)
usr2,movie2 = np.where(Xtr == -1) # find all NaN's
rhat = np.zeros((len(usr2),fit3.num_samples))



for i,n,m in zip(range(len(usr2)),usr2,movie2):
 rhat[i,:] = xform(fit3["u"][n,:]+fit3["v"][m,:]+(fit3["U"][n,:,:]*fit3["V"][m,:,:]).sum(0))

Xte = np.loadtxt('top-ratings-imputed.txt', delimiter=' ')
Xte = Xte[0:50,0:100]

print("Training RMSE: ", np.sqrt(((rTr.mean(1)-rating)**2).mean()))
print("Test RMSE: ", np.sqrt(((rhat.mean(1)-Xte[usr2,movie2])**2).mean()))

In [None]:
def xform(s): return .5 + 10.0/(1+np.exp(-s))-1
import pickle
fit = pickle.load(open('stan_fit.pkl', 'rb'))
rTr = np.zeros((len(usr),fit.num_samples))
for i,n,m in zip(range(len(usr)),usr,movie):
 rTr[i,:] = xform(fit["u"][n,:]+fit["v"][m,:]+(fit["U"][n,:,:]*fit["V"][m,:,:]).sum(0))
print(rTr)
usr2,movie2 = np.where(Xtr == -1) # find all NaN's
rhat = np.zeros((len(usr2),fit.num_samples))



for i,n,m in zip(range(len(usr2)),usr2,movie2):
 rhat[i,:] = xform(fit["u"][n,:]+fit["v"][m,:]+(fit["U"][n,:,:]*fit["V"][m,:,:]).sum(0))

Xte = np.loadtxt('top-ratings-imputed.txt', delimiter=' ')
Xte = Xte[0:50,0:100]

print("Training RMSE: ", np.sqrt(((rTr.mean(1)-rating)**2).mean()))
print("Test RMSE: ", np.sqrt(((rhat.mean(1)-Xte[usr2,movie2])**2).mean()))

In [7]:
def xform(s): return .5 + 10.0/(1+np.exp(-s))-1
import pickle
fit = pickle.load(open('stan_fit5.pkl', 'rb'))
rTr = np.zeros((len(usr),fit.num_samples))
for i,n,m in zip(range(len(usr)),usr,movie):
 rTr[i,:] = xform(fit["u"][n,:]+fit["v"][m,:]+(fit["U"][n,:,:]*fit["V"][m,:,:]).sum(0))
print(rTr)
usr2,movie2 = np.where(Xtr == -1) # find all NaN's
rhat = np.zeros((len(usr2),fit.num_samples))



for i,n,m in zip(range(len(usr2)),usr2,movie2):
 rhat[i,:] = xform(fit["u"][n,:]+fit["v"][m,:]+(fit["U"][n,:,:]*fit["V"][m,:,:]).sum(0))

Xte = np.loadtxt('top-ratings-imputed.txt', delimiter=' ')
Xte = Xte[0:50,0:100]

print("Training RMSE: ", np.sqrt(((rTr.mean(1)-rating)**2).mean()))
print("Test RMSE: ", np.sqrt(((rhat.mean(1)-Xte[usr2,movie2])**2).mean()))

[[9.08212844 9.07358268 9.03689064 ... 9.05253612 9.06558973 9.02591697]
 [7.71539498 7.64583405 7.66572838 ... 7.67271751 7.6945051  7.67259597]
 [8.6740714  8.65389381 8.72832183 ... 8.65899839 8.6504275  8.69028524]
 ...
 [5.61883201 5.68152621 5.64985238 ... 5.62542996 5.67612826 5.67901466]
 [6.10479877 6.06781568 6.11632687 ... 6.06009521 6.02128775 6.07486345]
 [6.30465889 6.2691421  6.19254726 ... 6.20129773 6.15424411 6.21907161]]
Training RMSE:  1.258085557101938
Test RMSE:  2.6493284173115152
