In [39]:
import random

from surprise import SVD
from surprise import Dataset
from surprise import accuracy
from surprise import Reader

from surprise.model_selection import GridSearchCV

In [40]:
# Load the Python libraries
import os
import pandas as pd
import numpy as np
from surprise.model_selection import train_test_split

In [41]:
file_path = os.path.expanduser('~/PycharmProjects/Tesis1/ml-100k/u.data')

reader = Reader(line_format='user item rating timestamp', sep='\t', rating_scale=(1, 5))

data = Dataset.load_from_file(file_path, reader=reader)

In [42]:

#Asignamos los datos a una lista raw_ratings, sin indices
raw_ratings = data.raw_ratings



In [43]:
len(raw_ratings)

100000

In [44]:
random.shuffle(raw_ratings)

In [45]:
# Separamos en train y test
threshold = int(.9 * len(raw_ratings))
train_raw_ratings = raw_ratings[:threshold]
test_raw_ratings = raw_ratings[threshold:]

data.raw_ratings = train_raw_ratings  # Reemplaza en data con los valores de entrenamiento

In [49]:
# Utilizamos gridsearch para obetener los mejores parametros para el algoritmo seleccionado
print('Grid Search...')
#Poniendo "biased" como falso, hacemos que el algoritmo SVD se asemeje a PMF
param_grid = {'n_factors': [50,100,150],'n_epochs':[25,50,60],'biased':[False],'lr_all': [0.01, 0.1],'reg_all':[0.02,0.1,0.5]}
grid_search = GridSearchCV(SVD, param_grid, measures=['rmse'], cv=3,n_jobs = -1)
grid_search.fit(data)

Grid Search...


In [50]:
algo = grid_search.best_estimator['rmse']
print(grid_search.best_score['rmse'])
print(grid_search.best_params['rmse'])

0.9417157459429343
{'n_factors': 50, 'n_epochs': 50, 'biased': False, 'lr_all': 0.01, 'reg_all': 0.1}


In [17]:
print(algo)

<surprise.prediction_algorithms.matrix_factorization.SVD object at 0x7fbbdcd91280>


In [51]:

# Se reentrena el set de entrenamiento con el mejor conjunto de parametros obtenido
trainset = data.build_full_trainset()
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f0ec8ddfdc0>

In [56]:

# Con la primer funcion creamos un set de test a partir del set de entrenamiento
predictions = algo.test(trainset.build_testset())
print('Accuracy on Trainset,', end='   ')
accuracy.rmse(predictions)

Accuracy on Trainset,   RMSE: 0.7130


0.7130467751376226

In [57]:
# Prueba el modelo con el set de test
testset = data.construct_testset(test_raw_ratings) 
predictions = algo.test(testset)
print('Accuracy on Testset,', end=' ')
accuracy.rmse(predictions)

Accuracy on Testset, RMSE: 0.9158


0.9157965094494755

In [58]:
print(trainset.n_users)
print(trainset.n_items)
print(algo.qi.shape)
print(algo.pu.shape)


943
1669
(1669, 50)
(943, 50)
