## An example of the operation of model AutoRec

In [None]:
import pandas as pd
import numpy as np
import torch 
from torch.utils import data
from torch import nn

from typing import List, Tuple, Dict, Union, Optional
from typing import Iterable

In [None]:
from models import AutoRec, Model
from load_data import get_validation_data
from utils import Dataset

### Load validation data

In [3]:
validation_data, heristic = get_validation_data()
validation_data

Unnamed: 0,user_0,user_1,user_2,user_id,item_0,item_1,item_2,item_id,rating
0,-0.189219,0.688799,0.496749,0,-7.189302,-17.358819,-8.191424,0,3.0
1,-0.189219,0.688799,0.496749,0,0.749574,0.957448,2.106965,1,9.0
2,-0.189219,0.688799,0.496749,0,-3.443982,-0.042470,-1.148656,2,6.0
3,-0.189219,0.688799,0.496749,0,-1.101991,-0.047565,-0.453863,3,6.0
4,-0.189219,0.688799,0.496749,0,7.837388,18.125961,19.356996,4,9.0
...,...,...,...,...,...,...,...,...,...
24995,-0.401447,-0.463426,-1.824159,249,-1.780516,-3.164613,0.289502,95,7.0
24996,-0.401447,-0.463426,-1.824159,249,-1.835749,-1.362072,-0.549780,96,8.0
24997,-0.401447,-0.463426,-1.824159,249,-0.900141,-0.433615,0.459035,97,6.0
24998,-0.401447,-0.463426,-1.824159,249,-1.414088,2.804230,-37.497444,98,10.0


In [4]:
heristic.head()

Unnamed: 0,user_id,item_id,rating
0,0,0,9.0
1,0,1,8.0
2,0,2,8.0
3,0,3,8.0
4,0,4,6.0


### Create validation torch datasets and train/test split 

In [5]:
data_sparse = validation_data #validation_data.sample(frac=0.05)
data_sparse = data_sparse.sample(frac=0.1)
data_train = data_sparse.sample(frac=0.8)
data_test = data_sparse.drop(data_train.index)

In [6]:
dataset = Dataset(data_sparse, 
                  validation_data.user_id.unique().shape[0], 
                  validation_data.item_id.unique().shape[0], 
                  user_based=True,
                  long_matrix=True,
                  user_column="user_id", item_column="item_id", rating_column="rating")

dataset_train = Dataset(data_train, 
                  validation_data.user_id.unique().shape[0], 
                  validation_data.item_id.unique().shape[0], 
                  user_based=True,
                  long_matrix=True,
                  user_column="user_id", item_column="item_id", rating_column="rating")

dataset_test = Dataset(data_test, 
                  validation_data.user_id.unique().shape[0], 
                  validation_data.item_id.unique().shape[0], 
                  user_based=True,
                  long_matrix=True,
                  user_column="user_id", item_column="item_id", rating_column="rating")

### AutoRec model init

In [7]:
i_autorec = AutoRec(
    input_size=validation_data.item_id.unique().shape[0],
    hidden_dims=[500],
    encoder_activation_fn = nn.Sigmoid,
    decoder_activation_fn = None,
    dropout=0.05,
    bias=True
)

In [8]:
def init_weights(layer):
        if isinstance(layer, nn.Linear):
            torch.nn.init.xavier_normal_(layer.weight)
            layer.bias.data.fill_(0.01)

In [9]:
i_autorec.apply(init_weights)

AutoRec(
  (encoder): Sequential(
    (0): Linear(in_features=100, out_features=500, bias=True)
    (1): Sigmoid()
    (2): Dropout(p=0.05, inplace=False)
  )
  (decoder): Sequential(
    (0): Linear(in_features=500, out_features=100, bias=True)
    (1): Dropout(p=0.05, inplace=False)
  )
)

In [10]:
model = Model(
    model=i_autorec,
    optimizer=torch.optim.Adam,
    optimizer_config={"lr": 0.001, "betas": (0.9, 0.999), "eps": 1e-08, "weight_decay": 0.1},
    loss_fn=nn.MSELoss(),
    batch_size=16,
    num_epoch=50,
    device=torch.device("cuda:1")
)

### Train model

In [11]:
errors = model.train(dataset_train, 0.2, False)

### Test model

In [12]:
model.test(dataset_train, dataset_test, with_nulls=False)

{'rmse': 6.249121582157134,
 'mae': 5.589507727622986,
 'ndcg': 0.2198385844199559}