<a href="https://colab.research.google.com/github/Ruruthia/Boardgames-recommending-system/blob/master/Tests.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [1]:
!pip install surprise



In [2]:
from __future__ import (absolute_import, division, print_function,
                        unicode_literals)
import pickle
import os

from google.colab import drive

import pandas as pd
import numpy as np

from surprise import SVD
from surprise import NMF
from surprise.dataset import DatasetAutoFolds
from surprise import Reader
from surprise.model_selection import cross_validate
from surprise import dump
from surprise.accuracy import rmse
from surprise.accuracy import mae

# Preparing dataset

In [3]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
ratings_df = pd.read_csv('/content/drive/My Drive/ratings.csv.gz', compression='gzip')[['bgg_user_name', 'bgg_id', 'bgg_user_rating']]

In [5]:
ratings_df = ratings_df[ratings_df['bgg_user_rating']>=1]

In [6]:
users = ratings_df['bgg_user_name'].unique()
np.random.shuffle(users)
train_size = int(0.7*users.shape[0])
train_df = ratings_df[ratings_df['bgg_user_name'].isin(users[:train_size])]
test_df = ratings_df[ratings_df['bgg_user_name'].isin(users[train_size:])]

In [7]:
def split_test(test_df, seed=42, frac=0.8):
    grouped = test_df.groupby(by='bgg_user_name')
    test_known = []
    test_unknown = []
    for user, df in grouped:
        df_size = df.shape[0]

        known_size = int(round(frac*df_size))
        known_indices = np.random.choice(df_size, known_size, replace=False)
        known_data = df.iloc[known_indices]
        test_known.append(known_data)

        unknown_indices = np.setdiff1d(np.arange(df_size), known_indices)
        unknown_data = df.iloc[unknown_indices]
        test_unknown.append(unknown_data)

    return pd.concat(test_known), pd.concat(test_unknown)

In [None]:
def get_errors(model, df):
    errors = []
    for _, rating in df.iterrows():
        est = model.predict(uid=rating['bgg_user_name'], iid=rating['bgg_id'])[3]
        err = est - rating['bgg_user_rating']
        errors.append(err)
    return np.array(errors)

In [48]:
def MSE(errors):
    return np.mean(errors**2)

def RMSE(errors):
    return np.sqrt(MSE(errors))

In [8]:
test_known, test_unknown = split_test(test_df)

In [10]:
reader = Reader(rating_scale=(1,10))
data = DatasetAutoFolds.load_from_df(train_df.append(test_known), reader).build_full_trainset()

# Tests

## SVD

In [11]:
algo = SVD()

In [12]:
algo.fit(data)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f17454eef50>

In [41]:
errors = get_errors(algo, test_unknown)

In [44]:
errors = np.array(errors)

In [49]:
print(MSE(errors))
print(RMSE(errors))

1.4889285583316574
1.2202166030388446


# NMF

In [None]:
algo = NMF()

In [None]:
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm NMF on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.8688  1.8569  1.8721  1.8747  1.8691  1.8683  0.0061  
MAE (testset)     1.5952  1.5826  1.5988  1.6015  1.5957  1.5948  0.0065  
Fit time          1062.20 1097.89 1107.79 1087.02 1078.93 1086.76 15.69   
Test time         74.65   73.51   62.34   70.71   74.31   71.10   4.60    


{'fit_time': (1062.197312116623,
  1097.8854765892029,
  1107.786484003067,
  1087.024235010147,
  1078.9255058765411),
 'test_mae': array([1.59524015, 1.58262809, 1.59881711, 1.60151788, 1.59570603]),
 'test_rmse': array([1.86880646, 1.85686863, 1.87213347, 1.87465722, 1.8691067 ]),
 'test_time': (74.64638924598694,
  73.50714683532715,
  62.339067697525024,
  70.71038246154785,
  74.31465744972229)}