In [4]:
import torch 
from torch.utils import data
from torch import nn

from typing import List, Tuple, Dict, Union, Optional
from typing import Iterable

import pandas as pd
import numpy as np
import surprise

from tqdm.notebook import tqdm

In [5]:
import sys
import os 

module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

sys.path.insert(0, r'./modules/')

In [6]:
from modules import AutoRec, Model, CollobarativeModel
from modules import get_validation_data, Dataset
from modules import nmf, svd, knn, autorec

In [7]:
to_dataset = lambda x: Dataset(x, 
                  validation_data.user_id.unique().shape[0], 
                  validation_data.item_id.unique().shape[0], 
                  user_based=True,
                  long_matrix=True,
                  user_column="user_id", item_column="item_id", rating_column="rating")

In [8]:
validation_data = pd.read_csv(os.path.join('../..', r"./synthetic_data/validation_data/dense_"+str(13)+'.csv'))
heristic = pd.read_csv(os.path.join('../..', r"./synthetic_data/validation_data/dense_hue_"+str(13)+'.csv'))

## Sparse case

In [7]:
SAMPLE_SIZE = [0.1, 0.5, 0.9]
STEPS = 20
SEED = 2022

In [8]:
result = pd.DataFrame()

torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = True

np.random.seed(SEED)

In [None]:
SAMPLE_SIZE = [0.1, 0.5, 0.9]
for sample in tqdm(SAMPLE_SIZE):
    data_sparse = validation_data.sample(frac=0.05)
    data_sparse = data_sparse.sample(frac=sample)

    data_train = data_sparse.sample(frac=0.8)
    data_test = data_sparse.drop(data_train.index)
    heu_train = heristic.loc[data_train.index]
    heu_test = heristic.loc[data_test.index]

    data_train = to_dataset(data_train)
    data_test = to_dataset(data_test)
    heu_train = to_dataset(heu_train)
    heu_test = to_dataset(heu_test)
    random_train = torch.randint(low=0, high=11, size=data_train[:].shape)*data_train[:].sign()
    random_test = torch.randint(low=0, high=11, size=data_test[:].shape)*data_test[:].sign()

    for i in range(0, STEPS+1):
        for j in range(0, STEPS-i+1):
            k = STEPS - i - j
            a1, a2, a3 = i/STEPS, j/STEPS, k/STEPS
            
            print()
            print(a1, a2, a3)
            print("----------------------------------------------")
            print()
            
            mtp_train = a1*data_train[:]+a2*heu_train[:]+a3*random_train[:]
            mtp_test = a1*data_test[:]+a2*heu_test[:]+a3*random_test[:]

            mtp_train = Dataset(mtp_train, 
                                validation_data.user_id.unique().shape[0], 
                                validation_data.item_id.unique().shape[0], 
                                user_based=True,
                                long_matrix=False,
                                user_column="user_id", item_column="item_id", rating_column="rating")

            mtp_test = Dataset(mtp_test, 
                               validation_data.user_id.unique().shape[0], 
                               validation_data.item_id.unique().shape[0], 
                               user_based=True,
                               long_matrix=False,
                               user_column="user_id", item_column="item_id", rating_column="rating")
            
            svd_result = svd([mtp_train], [mtp_test], sample, cnt=1, with_null=False)
            svd_result['a1']=a1
            svd_result['a2']=a2
            svd_result['a3']=a3
            
            nmf_result = nmf([mtp_train], [mtp_test], sample, cnt=1, with_null=False)
            nmf_result['a1']=a1
            nmf_result['a2']=a2
            nmf_result['a3']=a3
            
            knn_result = knn([mtp_train], [mtp_test], sample, cnt=1, with_null=False)
            knn_result['a1']=a1
            knn_result['a2']=a2
            knn_result['a3']=a3
            
            autorec_result = autorec([mtp_train], [mtp_test], validation_data, sample, cnt=1, with_null=True)
            autorec_result['a1']=a1
            autorec_result['a2']=a2
            autorec_result['a3']=a3
            
            result = result.append(svd_result)
            result = result.append(nmf_result)
            result = result.append(knn_result)
            result = result.append(autorec_result)

  0%|          | 0/3 [00:00<?, ?it/s]


0.0 0.0 1.0
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f0a69d30>
RMSE: 2.9625
MAE:  2.6836
<surprise.trainset.Trainset object at 0x7fc7f18fe4e0>
RMSE: 3.1565
MAE:  2.8366
<surprise.trainset.Trainset object at 0x7fc7f18fe860>
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.9143
MAE:  2.6167

0.0 0.05 0.95
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f0a69470>
RMSE: 3.0833
MAE:  2.8465
<surprise.trainset.Trainset object at 0x7fc7f18fe908>
RMSE: 3.3633
MAE:  3.1257
<surprise.trainset.Trainset object at 0x7fc7f18fe978>
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 3.0494
MAE:  2.7920

0.0 0.1 0.9
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f18fe3c8>
RMSE: 2.9528
MAE:  2.7222
<surprise.trainset.Trainset object at 0x7fc7f18fe7b8>
RMSE: 3.2863
MAE:  3.0738
<surprise.trainset.Trainset o

  result = {"rmse":np.sqrt(rmse/cnt), "mae": mae/cnt, "ndcg":np.nanmean(ndcg)}



0.0 0.45 0.55
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f0a69160>
RMSE: 2.1535
MAE:  1.8750
<surprise.trainset.Trainset object at 0x7fc7f18fe588>
RMSE: 2.5503
MAE:  2.2656
<surprise.trainset.Trainset object at 0x7fc7f0a69588>
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1611
MAE:  1.8688


  result = {"rmse":np.sqrt(rmse/cnt), "mae": mae/cnt, "ndcg":np.nanmean(ndcg)}



0.0 0.5 0.5
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f0a69828>
RMSE: 2.0725
MAE:  1.7853
<surprise.trainset.Trainset object at 0x7fc7f0a69320>
RMSE: 2.4612
MAE:  2.1583
<surprise.trainset.Trainset object at 0x7fc7f0a697b8>
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.0787
MAE:  1.7644

0.0 0.55 0.45
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f0a69dd8>
RMSE: 1.9871
MAE:  1.6623
<surprise.trainset.Trainset object at 0x7fc7f1c6aeb8>
RMSE: 2.2759
MAE:  1.9605
<surprise.trainset.Trainset object at 0x7fc7f0a69320>
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.0056
MAE:  1.6640

0.0 0.6 0.4
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f0a69550>
RMSE: 1.9189
MAE:  1.5934
<surprise.trainset.Trainset object at 0x7fc7f0a69358>
RMSE: 2.2644
MAE:  1.9429
<surprise.trainset.Trainset o

  result = {"rmse":np.sqrt(rmse/cnt), "mae": mae/cnt, "ndcg":np.nanmean(ndcg)}



0.0 0.9 0.1
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f0a69dd8>
RMSE: 1.7445
MAE:  1.3103
<surprise.trainset.Trainset object at 0x7fc7f0a697b8>
RMSE: 1.9908
MAE:  1.5482
<surprise.trainset.Trainset object at 0x7fc7f0a69518>
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 1.8327
MAE:  1.3828

0.0 0.95 0.05
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f0af5b00>
RMSE: 1.7448
MAE:  1.2762
<surprise.trainset.Trainset object at 0x7fc7f18fe4e0>
RMSE: 2.0202
MAE:  1.5133
<surprise.trainset.Trainset object at 0x7fc7f0af5b00>
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 1.8621
MAE:  1.3688


  result = {"rmse":np.sqrt(rmse/cnt), "mae": mae/cnt, "ndcg":np.nanmean(ndcg)}



0.0 1.0 0.0
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f0a69550>
RMSE: 1.7951
MAE:  1.3007
<surprise.trainset.Trainset object at 0x7fc7f18fe080>
RMSE: 2.0394
MAE:  1.5326
<surprise.trainset.Trainset object at 0x7fc7f18fe400>
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 1.9045
MAE:  1.3868

0.05 0.0 0.95
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f0a697b8>
RMSE: 3.0400
MAE:  2.8011
<surprise.trainset.Trainset object at 0x7fc7f18fe710>
RMSE: 3.4100
MAE:  3.1665
<surprise.trainset.Trainset object at 0x7fc7f18fe0b8>
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 3.0202
MAE:  2.7605

0.05 0.05 0.9
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f18fe0b8>
RMSE: 2.9191
MAE:  2.6930
<surprise.trainset.Trainset object at 0x7fc7f0a69160>
RMSE: 3.3267
MAE:  3.0832
<surprise.trainset.Trainset

  result = {"rmse":np.sqrt(rmse/cnt), "mae": mae/cnt, "ndcg":np.nanmean(ndcg)}



0.05 0.15 0.8
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f0a69e48>
RMSE: 2.6617
MAE:  2.4342
<surprise.trainset.Trainset object at 0x7fc7f0a696a0>
RMSE: 3.0373
MAE:  2.8132
<surprise.trainset.Trainset object at 0x7fc7f0a69780>
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.6421
MAE:  2.3969

0.05 0.2 0.75
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f18fe438>
RMSE: 2.5449
MAE:  2.3280
<surprise.trainset.Trainset object at 0x7fc7f0a75ba8>
RMSE: 2.9670
MAE:  2.7337
<surprise.trainset.Trainset object at 0x7fc7f0a694a8>
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.5236
MAE:  2.2757


  result = {"rmse":np.sqrt(rmse/cnt), "mae": mae/cnt, "ndcg":np.nanmean(ndcg)}



0.05 0.25 0.7
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f18fe828>
RMSE: 2.4197
MAE:  2.1801
<surprise.trainset.Trainset object at 0x7fc7f0af5b00>
RMSE: 2.7786
MAE:  2.5576
<surprise.trainset.Trainset object at 0x7fc7f18fe3c8>
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.4099
MAE:  2.1545


  result = {"rmse":np.sqrt(rmse/cnt), "mae": mae/cnt, "ndcg":np.nanmean(ndcg)}



0.05 0.3 0.65
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f0a69c50>
RMSE: 2.3153
MAE:  2.0889
<surprise.trainset.Trainset object at 0x7fc7f0a8dac8>
RMSE: 2.6497
MAE:  2.4312
<surprise.trainset.Trainset object at 0x7fc7f18fe7f0>
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.3017
MAE:  2.0468


  result = {"rmse":np.sqrt(rmse/cnt), "mae": mae/cnt, "ndcg":np.nanmean(ndcg)}



0.05 0.35 0.6
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f0a69358>
RMSE: 2.2043
MAE:  1.9675
<surprise.trainset.Trainset object at 0x7fc7f18fe5c0>
RMSE: 2.5361
MAE:  2.3118
<surprise.trainset.Trainset object at 0x7fc7f18fe748>
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1997
MAE:  1.9424

0.05 0.4 0.55
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f18fe5f8>
RMSE: 2.0993
MAE:  1.8476
<surprise.trainset.Trainset object at 0x7fc7f18fe860>
RMSE: 2.4434
MAE:  2.1919
<surprise.trainset.Trainset object at 0x7fc7f18fe748>
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1049
MAE:  1.8380

0.05 0.45 0.5
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f0a69860>
RMSE: 2.0105
MAE:  1.7508
<surprise.trainset.Trainset object at 0x7fc89c686be0>
RMSE: 2.3553
MAE:  2.0891
<surprise.trainset.Trains

  result = {"rmse":np.sqrt(rmse/cnt), "mae": mae/cnt, "ndcg":np.nanmean(ndcg)}



0.05 0.7 0.25
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f0a69d30>
RMSE: 1.7002
MAE:  1.3822
<surprise.trainset.Trainset object at 0x7fc7f18fe748>
RMSE: 2.1852
MAE:  1.8126
<surprise.trainset.Trainset object at 0x7fc7f18fe6a0>
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 1.7467
MAE:  1.4236

0.05 0.75 0.2
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f0a69dd8>
RMSE: 1.6639
MAE:  1.3218
<surprise.trainset.Trainset object at 0x7fc7f0a69e80>
RMSE: 2.0025
MAE:  1.6546
<surprise.trainset.Trainset object at 0x7fc7f18fe908>
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 1.7319
MAE:  1.3872

0.05 0.8 0.15
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f18fe080>
RMSE: 1.6525
MAE:  1.2914
<surprise.trainset.Trainset object at 0x7fc7f18fe860>
RMSE: 1.9766
MAE:  1.5920
<surprise.trainset.Trains

  result = {"rmse":np.sqrt(rmse/cnt), "mae": mae/cnt, "ndcg":np.nanmean(ndcg)}



0.05 0.9 0.05
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f18fe588>
RMSE: 1.6786
MAE:  1.2572
<surprise.trainset.Trainset object at 0x7fc7f18fe710>
RMSE: 1.9088
MAE:  1.4714
<surprise.trainset.Trainset object at 0x7fc7f18fe978>
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 1.7748
MAE:  1.3300

0.05 0.95 0.0
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f0aec358>
RMSE: 1.7178
MAE:  1.2727
<surprise.trainset.Trainset object at 0x7fc7f18fe8d0>
RMSE: 1.9364
MAE:  1.4586
<surprise.trainset.Trainset object at 0x7fc7f18fe400>
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 1.8171
MAE:  1.3378

0.1 0.0 0.9
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f0a69080>
RMSE: 2.8905
MAE:  2.6544
<surprise.trainset.Trainset object at 0x7fc7f18fe828>
RMSE: 3.2759
MAE:  3.0237
<surprise.trainset.Trainset

  result = {"rmse":np.sqrt(rmse/cnt), "mae": mae/cnt, "ndcg":np.nanmean(ndcg)}



0.1 0.15 0.75
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f18fe7f0>
RMSE: 2.5053
MAE:  2.2776
<surprise.trainset.Trainset object at 0x7fc7f18fe908>
RMSE: 2.9372
MAE:  2.7015
<surprise.trainset.Trainset object at 0x7fc7f18fe4a8>
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.4917
MAE:  2.2442

0.1 0.2 0.7
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f0a69748>
RMSE: 2.3827
MAE:  2.1605
<surprise.trainset.Trainset object at 0x7fc7f0af5a90>
RMSE: 2.7737
MAE:  2.5524
<surprise.trainset.Trainset object at 0x7fc7f18fe748>
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.3748
MAE:  2.1230

0.1 0.25 0.65
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f0a69898>
RMSE: 2.2751
MAE:  2.0462
<surprise.trainset.Trainset object at 0x7fc7f0a69160>
RMSE: 2.6431
MAE:  2.4269
<surprise.trainset.Trainset

  result = {"rmse":np.sqrt(rmse/cnt), "mae": mae/cnt, "ndcg":np.nanmean(ndcg)}



0.1 0.3 0.6
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f0a69c50>
RMSE: 2.1557
MAE:  1.9334
<surprise.trainset.Trainset object at 0x7fc7f18fe400>
RMSE: 2.5100
MAE:  2.2775
<surprise.trainset.Trainset object at 0x7fc7f0a69160>
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1576
MAE:  1.9117

0.1 0.35 0.55
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f18fe438>
RMSE: 2.0554
MAE:  1.8228
<surprise.trainset.Trainset object at 0x7fc7f18fe0b8>
RMSE: 2.4546
MAE:  2.2075
<surprise.trainset.Trainset object at 0x7fc7f18fe940>
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.0589
MAE:  1.8073

0.1 0.4 0.5
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f0a69e48>
RMSE: 1.9565
MAE:  1.7086
<surprise.trainset.Trainset object at 0x7fc7f18fe438>
RMSE: 2.4350
MAE:  2.1564
<surprise.trainset.Trainset o

  result = {"rmse":np.sqrt(rmse/cnt), "mae": mae/cnt, "ndcg":np.nanmean(ndcg)}



0.1 0.55 0.35
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f0a69f28>
RMSE: 1.7197
MAE:  1.4426
<surprise.trainset.Trainset object at 0x7fc7f0aec358>
RMSE: 2.1579
MAE:  1.8617
<surprise.trainset.Trainset object at 0x7fc7f0a69c50>
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 1.7563
MAE:  1.4737

0.1 0.6 0.3
----------------------------------------------

<surprise.trainset.Trainset object at 0x7fc7f18fe6a0>
RMSE: 1.6703
MAE:  1.3894
<surprise.trainset.Trainset object at 0x7fc7f18fe4a8>
RMSE: 1.9816
MAE:  1.7140
<surprise.trainset.Trainset object at 0x7fc7f18fe470>
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 1.7098
MAE:  1.4213


  result = {"rmse":np.sqrt(rmse/cnt), "mae": mae/cnt, "ndcg":np.nanmean(ndcg)}


In [None]:
from modules import visualize_3d_plot, group_points_by_minimum_error
names = {'SVD': 'svd', 'AutoRec': 'autorec', 'NMF': 'nmf', 'kNN': 'knn'}

In [None]:
result = result.rename(columns={'model': 'model_name', 'a1': 'a2', 'a2': 'a1'})
result.model_name = result.model_name.apply(lambda x: names[x])
result = result.drop("a3", axis=1)

In [None]:
rest_sparse = group_points_by_minimum_error(result)

In [None]:
visualize_3d_plot(rest_sparse, [0.1], metric="RMSE", save_path=None)

In [None]:
visualize_3d_plot(rest_sparse, [0.5], metric="RMSE", save_path=None)

In [None]:
visualize_3d_plot(rest_sparse, [0.9], metric="RMSE", save_path=None)