In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt
import import_ipynb

In [2]:
data = pd.read_csv('ratings.csv')

# train_test_split
train, test = train_test_split(data, test_size = 0.2, stratify = data['userId'])

# Comment out the line below not to include validation set
train, validation = train_test_split(train, test_size = 0.1, stratify = train['userId'])

train_data = np.array(train.iloc[:,:-1])
test_data = np.array(test.iloc[:,:-1])

# Comment out the line below not to include validation set
validation_data = np.array(validation.iloc[:,:-1])

num_users = max(data['userId'].values)
num_items = max(data['movieId'].values)

# construct model
import PMF_model

lambda_u = 0.1
lambda_v = 0.1
latent_size = 20
lr = 0.001
num_iter = 2000
stopping_deriv=0.0
model = PMF_model.PMF(m = num_items, n = num_users, lambda_u=lambda_u, lambda_v=lambda_v, latent_size=latent_size, lr=lr, num_iter=num_iter, stopping_deriv=stopping_deriv, seed=1)
print('PMF(num_items={:d}, num_users={:d}, reg_u={:f}, reg_v={:f}, latent_size={:d}, learning rate={:f}, itertion number={:d})'.format(num_items, num_users, lambda_u, lambda_v, latent_size, lr, num_iter))
print('training model...')

# Run one of the two lines below based on whether you want to include validation set or not
U, V, train_rmse, validation_rmse = model.fit(train_data=train_data, validation_data=validation_data)
# U, V, train_rmse = model.fit(train_data=train_data)

print('train rmse:{:f}'.format(train_rmse))

print('testing model...')
preds = model.predict(data=test_data)
test_rmse = sqrt(mean_squared_error(preds, test_data[:, 2]))

print('test rmse:{:f}'.format(test_rmse))

importing Jupyter notebook from PMF_model.ipynb
PMF(num_items=193609, num_users=610, reg_u=0.010000, reg_v=0.010000, latent_size=20, learning rate=0.000030, itertion number=1000)
training model...


In [None]:
from sklearn.kernel_ridge import KernelRidge
#For user specific
rating_preds = np.zeros((num_users,num_items))

kernel = 'rbf'
alpha = 0.05
gamma = None

test_mse = []
train_mse = []

for userId in range(1,num_users+1):
    user_spec = train.loc[train['userId'] == userId]
    user_spec_test = test.loc[test['userId'] == userId]
    if (len(user_spec)!=0) and (len(user_spec_test)!=0):
        X_train = V.T[user_spec['movieId']-1,:]
        y_train = user_spec['rating']
        X_test = V.T[user_spec_test['movieId']-1,:]
        # list_movieId = user_spec_test['movieId']-1#############
        
        model_kr =  KernelRidge(kernel=kernel, alpha=alpha, gamma=gamma)
        cls = model_kr.fit(X_train,y_train)
        y_preds = cls.predict(X_test)
        train_preds = cls.predict(X_train)

        test_mse.append(mean_squared_error(user_spec_test['rating'], y_preds))
        train_mse.append(mean_squared_error(user_spec['rating'], train_preds))
        # rating_preds[userId-1,list_movieId.values] = y_preds

In [None]:
print(sqrt(np.array(test_mse).mean()))
print(sqrt(np.array(train_mse).mean()))