In [29]:
%matplotlib inline

import numpy as np
import scipy
import scipy.io
import scipy.sparse as sp
import matplotlib.pyplot as plt
import surprise
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Ratings that have to be predicted (items and users specified in sample_submission file)

In [30]:
from data_helpers import read_csv_sample

INPUT_PATH = "../data/sample_submission.csv"
ids = read_csv_sample(INPUT_PATH)

## NMF

In [31]:
from surprise.model_selection import GridSearchCV
from surprise import NMF
from surprise import Dataset
from surprise import Reader

# path to dataset file
file_path = "../data/data_surprise.csv"

reader = Reader(line_format='user item rating', sep=',', skip_lines=1)

data = Dataset.load_from_file(file_path, reader=reader)

param_grid = {'n_epochs': [20], 'n_factors' : [20], 'random_state' : [15], 'reg_pu' : [0.5], 'reg_qi' : [0.05]}

gs = GridSearchCV(NMF, param_grid, measures=['rmse', 'mae'], cv=2)
gs.fit(data)

# best RMSE score
print(gs.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])

1.010768800126342
{'n_epochs': 20, 'n_factors': 20, 'random_state': 15, 'reg_pu': 0.5, 'reg_qi': 0.05}


In [32]:
from surprise.model_selection import train_test_split
from surprise import accuracy

trainset, testset = train_test_split(data, test_size=.10)

algo = gs.best_estimator['rmse']

# Train the algorithm on the trainset, and predict ratings for the testset
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.NMF at 0x577fe248>

In [33]:
predictions = []

for i in range(len(ids[0])):
    pred = algo.predict(ids[0][i], ids[1][i])
    predictions.append(round(pred.est))
    
print(len(predictions))

1176952


In [34]:
pred = algo.test(testset)

# Then compute RMSE
accuracy.rmse(pred)

RMSE: 1.0044


1.0043827035760193

### Save output for submission

In [35]:
from data_helpers import create_csv_submission

OUTPUT_PATH = "../data/submission.csv"
create_csv_submission(ids, predictions, OUTPUT_PATH)
print("File submission.csv ready to be submitted !")

File submission.csv ready to be submitted !
