In [1]:
import pandas as pd
import numpy as np
import torch 
from torch.utils import data
from torch import nn

from typing import List, Tuple, Dict, Union, Optional
from typing import Iterable

In [2]:
from models import AutoRec, Model
from load_data import get_validation_data
from utils import Dataset

In [3]:
validation_data, heristic = get_validation_data()
validation_data

Unnamed: 0,user_0,user_1,user_2,user_id,item_0,item_1,item_2,item_id,rating
0,-0.189219,0.688799,0.496749,0,-7.189302,-17.358819,-8.191424,0,3.0
1,-0.189219,0.688799,0.496749,0,0.749574,0.957448,2.106965,1,9.0
2,-0.189219,0.688799,0.496749,0,-3.443982,-0.042470,-1.148656,2,6.0
3,-0.189219,0.688799,0.496749,0,-1.101991,-0.047565,-0.453863,3,6.0
4,-0.189219,0.688799,0.496749,0,7.837388,18.125961,19.356996,4,9.0
...,...,...,...,...,...,...,...,...,...
24995,-0.401447,-0.463426,-1.824159,249,-1.780516,-3.164613,0.289502,95,7.0
24996,-0.401447,-0.463426,-1.824159,249,-1.835749,-1.362072,-0.549780,96,8.0
24997,-0.401447,-0.463426,-1.824159,249,-0.900141,-0.433615,0.459035,97,6.0
24998,-0.401447,-0.463426,-1.824159,249,-1.414088,2.804230,-37.497444,98,10.0


In [4]:
heristic.head()

Unnamed: 0,user_id,item_id,rating
0,0,0,9.0
1,0,1,8.0
2,0,2,8.0
3,0,3,8.0
4,0,4,6.0


In [55]:
data_sparse = validation_data #validation_data.sample(frac=0.05)
data_sparse = data_sparse.sample(frac=0.1)
data_train = data_sparse.sample(frac=0.8)
data_test = data_sparse.drop(data_train.index)

In [56]:
dataset = Dataset(data_sparse, 
                  validation_data.user_id.unique().shape[0], 
                  validation_data.item_id.unique().shape[0], 
                  user_based=True,
                  long_matrix=True,
                  user_column="user_id", item_column="item_id", rating_column="rating")

dataset_train = Dataset(data_train, 
                  validation_data.user_id.unique().shape[0], 
                  validation_data.item_id.unique().shape[0], 
                  user_based=True,
                  long_matrix=True,
                  user_column="user_id", item_column="item_id", rating_column="rating")

dataset_test = Dataset(data_test, 
                  validation_data.user_id.unique().shape[0], 
                  validation_data.item_id.unique().shape[0], 
                  user_based=True,
                  long_matrix=True,
                  user_column="user_id", item_column="item_id", rating_column="rating")

In [57]:
i_autorec = AutoRec(
    input_size=validation_data.item_id.unique().shape[0],
    hidden_dims=[500],
    encoder_activation_fn = nn.Sigmoid,
    decoder_activation_fn = None,
    dropout=0.05,
    bias=True
)

In [58]:
def init_weights(layer):
        if isinstance(layer, nn.Linear):
            torch.nn.init.xavier_normal_(layer.weight)
            layer.bias.data.fill_(0.01)

In [59]:
i_autorec.apply(init_weights)

AutoRec(
  (encoder): Sequential(
    (0): Linear(in_features=100, out_features=500, bias=True)
    (1): Sigmoid()
    (2): Dropout(p=0.05, inplace=False)
  )
  (decoder): Sequential(
    (0): Linear(in_features=500, out_features=100, bias=True)
    (1): Dropout(p=0.05, inplace=False)
  )
)

In [80]:
model = Model(
    model=i_autorec,
    optimizer=torch.optim.Adam,
    optimizer_config={"lr": 0.001, "betas": (0.9, 0.999), "eps": 1e-08, "weight_decay": 0.1},
    loss_fn=nn.MSELoss(),
    batch_size=16,
    num_epoch=50,
    device=torch.device("cuda:1")
)

In [81]:
errors = model.train(dataset_train, 0.2, False)

In [91]:
model.test(dataset_test, dataset_test, with_nulls=True)

{'rmse': 0.5122106988226777, 'mae': 0.4924404801349251, 'ndcg': 0.0}

In [83]:
model.test(dataset_test)

{'rmse': 6.309031628960862,
 'mae': 5.672043684005737,
 'ndcg': 0.9273181192593506}

In [84]:
((i_autorec(dataset_test[:].to('cuda:1')).detach().cpu() - dataset_test[:])**2).mean().sqrt()

tensor(1.0263)

In [85]:
((model(dataset_test[:]).detach().cpu() - dataset_test[:])**2).mean().sqrt()

tensor(1.0263)

In [10]:
mtp = Dataset(validation_data, 
              validation_data.user_id.unique().shape[0], 
              validation_data.item_id.unique().shape[0], 
              user_based=True,
              long_matrix=True,
              user_column="user_id", item_column="item_id", rating_column="rating")[:].numpy()

In [11]:
import os
import sys
import math
import time
import numpy as np
import pandas as pd
from numpy import random as npr
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)


import importlib
sys.path.insert(0, r'../SinEvaRec/modules/')

import evaluator, trainers, utils, models
importlib.reload(models)
importlib.reload(trainers)
importlib.reload(evaluator)
importlib.reload(utils)

<module 'utils' from '../SinEvaRec/modules/utils.py'>

In [12]:
class ResponseFunction:
    def __init__(self, heu_matrix, nn_matrix, noise_matrix):
        assert heu_matrix.shape == nn_matrix.shape
        self._heu_matrix = heu_matrix
        self._nn_matrix = nn_matrix
        self._noise_matrix = noise_matrix
        
    def __call__(self, a1: float, a2: float):
        a3 = max(0.0, 1 - a1 - a2)
        
        return (
            a1 * self._heu_matrix
            + a2 * self._nn_matrix
            + a3 * npr.normal(1, 1, size=self._heu_matrix.shape)
            #to avoid identical zeros in NMF
            + 0.00001
        )    

In [13]:
result = pd.DataFrame()
SEED = 2021

DeepCTR-PyTorch version 0.2.8 detected. Your version is 0.2.7.
Use `pip install -U deepctr-torch` to upgrade.Changelog: https://github.com/shenweichen/DeepCTR-Torch/releases/tag/v0.2.8


In [14]:
_resp_fn_config = evaluator.ResponseFunctionConfig(
    factory=ResponseFunction,
    args=[mtp, mtp, mtp]
)

_evaluators = [
    evaluator.TrainTestExecutorConfig(
        factory=trainers.AutoRecTrainTestExecutor,
        args={},
        model_name="autorec"
    ) 
]
        
            
for sample in [0.1, 0.5, 0.9]:
    np.random.seed(SEED)
    _evaluator = evaluator.Evaluator(_resp_fn_config, n_proc=1)
    start = time.time()
    results_ = _evaluator.evaluate(
            _evaluators, 
            a_sample_rate=2,
            test_size=0.2, 
            sample_sizes=[sample]
    )
    end = time.time()
    print('Total time: ' + str(end - start))
    result = result.append(results_)

Subprocess started.
0.0 0.0 0.1
Load data finished. Number of users: 250 Number of items: 100
[_DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 268435456, 7982908155839458697), _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 16738212436277318978), _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_GPU:0, XLA_GPU, 17179869184, 18049860845911510459), _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_GPU:1, XLA_GPU, 17179869184, 2014187369871541049)]
IAutoRec.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.


Instructions for updating:
If using Keras pass *_constraint arguments to layers.


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


-----------------------Start 1-----------------------
-----------------------Start 2-----------------------
-----------------------Start 3-----------------------
-----------------------Start 4-----------------------
-----------------------Start 5-----------------------
Sess
Epoch: 0000; RMSE:1.116657815677006; MAE:0.8914390586349333
Epoch: 0003; RMSE:1.158405387371178; MAE:0.9155730073526109
Epoch: 0006; RMSE:1.1220464783540465; MAE:0.8855928577817912
Epoch: 0009; RMSE:1.1231554100794452; MAE:0.8897871277097796
Epoch: 0012; RMSE:1.1226214665661376; MAE:0.8915482787337438
Epoch: 0015; RMSE:1.1315856954403152; MAE:0.8974507729256916
Epoch: 0018; RMSE:1.1230030537962765; MAE:0.89312571743102
Epoch: 0021; RMSE:1.127198833519697; MAE:0.8958832629637282
Epoch: 0024; RMSE:1.1303323987430416; MAE:0.9014307082759447
Epoch: 0027; RMSE:1.13428678692376; MAE:0.9085726357724802
Epoch: 0030; RMSE:1.143998132995069; MAE:0.9155842581356418
Epoch: 0033; RMSE:1.1448301792597357; MAE:0.9193319779017294
E

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


-----------------------Start 1-----------------------
-----------------------Start 2-----------------------
-----------------------Start 3-----------------------
-----------------------Start 4-----------------------
-----------------------Start 5-----------------------
Sess
Epoch: 0000; RMSE:1.0362860038256794; MAE:0.8298368184183029
Epoch: 0003; RMSE:1.023114310088197; MAE:0.8234411370829815
Epoch: 0006; RMSE:1.0157660517618787; MAE:0.8188472363101919
Epoch: 0009; RMSE:1.0114354085737018; MAE:0.8146911825789601
Epoch: 0012; RMSE:1.0136616976671946; MAE:0.8184636543551191
Epoch: 0015; RMSE:1.0167379261328602; MAE:0.8221998802125173
Epoch: 0018; RMSE:1.0225012135016267; MAE:0.8256337008123413
Epoch: 0021; RMSE:1.0294602326720734; MAE:0.8305849824073528
Epoch: 0024; RMSE:1.0460856396998974; MAE:0.8458002495722343
Epoch: 0027; RMSE:1.0606165186788974; MAE:0.8568135491579422
Epoch: 0030; RMSE:1.0729926232050988; MAE:0.8657393612644743
Epoch: 0033; RMSE:1.08571791088858; MAE:0.8749916529302

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


-----------------------Start 1-----------------------
-----------------------Start 2-----------------------
-----------------------Start 3-----------------------
-----------------------Start 4-----------------------
-----------------------Start 5-----------------------
Sess
Epoch: 0000; RMSE:1.0676036358804324; MAE:0.8538918296600112
Epoch: 0003; RMSE:1.007645483353848; MAE:0.8061007828837209
Epoch: 0006; RMSE:1.0096511045293344; MAE:0.8082343560403047
Epoch: 0009; RMSE:1.0071807082332982; MAE:0.8067943830242787
Epoch: 0012; RMSE:1.009329834414564; MAE:0.8087866235024261
Epoch: 0015; RMSE:1.010859237754691; MAE:0.8098597401214199
Epoch: 0018; RMSE:1.0131102735390982; MAE:0.81204878373605
Epoch: 0021; RMSE:1.0210584825640554; MAE:0.8200554576602382
Epoch: 0024; RMSE:1.0315300615947367; MAE:0.8285442643081433
Epoch: 0027; RMSE:1.0479964535551591; MAE:0.840925570252362
Epoch: 0030; RMSE:1.0618597974359838; MAE:0.8526266427794218
Epoch: 0033; RMSE:1.0760185615628906; MAE:0.8641909665268861

In [16]:
result

Unnamed: 0,a1,a2,model_name,sample_size,rmse,mae,ndcg,test_size,calculation_time
0,0.0,0.0,autorec,0.1,1.165427,0.937845,0,0.2,0 days 00:00:02.871396
1,0.0,0.5,autorec,0.1,1.219253,0.92523,0,0.2,0 days 00:00:02.000139
2,0.0,1.0,autorec,0.1,2.002342,1.418651,0,0.2,0 days 00:00:02.209847
3,0.5,0.0,autorec,0.1,1.176155,0.898276,0,0.2,0 days 00:00:02.091715
4,0.5,0.5,autorec,0.1,2.127704,1.530626,0,0.2,0 days 00:00:02.226792
5,1.0,0.0,autorec,0.1,2.175486,1.54183,0,0.2,0 days 00:00:02.396757
0,0.0,0.0,autorec,0.5,1.1268,0.903861,0,0.2,0 days 00:00:03.570258
1,0.0,0.5,autorec,0.5,0.799066,0.578589,0,0.2,0 days 00:00:03.114906
2,0.0,1.0,autorec,0.5,1.516574,1.117065,0,0.2,0 days 00:00:03.376319
3,0.5,0.0,autorec,0.5,0.860904,0.592383,0,0.2,0 days 00:00:03.830054


In [17]:
result[result.sample_size == 0.1].rmse.mean()

1.644394682931338

In [18]:
result[result.sample_size == 0.5].rmse.mean()

1.227299340684236

In [19]:
result[result.sample_size == 0.9].rmse.mean()

1.5285269899738034

In [65]:
factory=trainers.AutoRecTrainTestExecutor()

In [66]:
long_table=trainers._rating_matrix_to_long_table(mtp)
long_table = long_table.sample(frac=0.1)

In [67]:
train_matrix, test_matrix, n_users, n_items = factory._transform_long_table_to_sparse_matrix(long_table, 0.2)

Load data finished. Number of users: 250 Number of items: 100


In [71]:
import typing as t
import tensorflow as tf

config = tf.compat.v1.ConfigProto() #
config.gpu_options.allow_growth = True #

In [72]:
with tf.compat.v1.Session() as sess:
    print(sess.list_devices())
    model = models.IAutoRec(sess, 250, 100)
    
    model.build_network()
    
    errors_log = model.execute(train_matrix, test_matrix)
    
    test = model.test(test_matrix)

[_DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 268435456, 7953295561788063595), _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 4340229576994145973), _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_GPU:0, XLA_GPU, 17179869184, 3600671085252589614), _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_GPU:1, XLA_GPU, 17179869184, 13288572854706407455)]
IAutoRec.
-----------------------Start 1-----------------------
-----------------------Start 2-----------------------
-----------------------Start 3-----------------------
-----------------------Start 4-----------------------
-----------------------Start 5-----------------------
Sess
Epoch: 0000; RMSE:5.826435950667381; MAE:5.169479575604201
Epoch: 0003; RMSE:2.642917508226645; MAE:2.269720501899719
Epoch: 0006; RMSE:2.771911409003094; MAE:2.1794244494438173
Epoch: 0009; RMSE:2.660479646650244; MAE:2.185828321933746
Epoch: 0012; RMSE:2.5887458571115993; 

In [73]:
test

(1.9362316583676855, 1.3905856037139892)