In [1]:
import random
import torch
import numpy as np
import pandas as pd

In [2]:
from models.neural_networks.utils import prepare_data_for_predict
from features.build_features import calculate_expenses
from data.utils import load_default_data
from features.build_features import aggregate_users_data
from models.neural_networks.NeuralNetworkRegressor import NeuralNetworkRegressor
from models.neural_networks.SimpleRegressor import SimpleRegressor
from models.neural_networks.utils import train

In [3]:
seed = 213769420
torch.cuda.manual_seed_all(seed)
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [4]:
train_sessions, test, products, users, deliveries = load_default_data(prefix="../../data/")
targets = calculate_expenses(test, products, users)
targets

Unnamed: 0,user_id,expenses
0,102,0.00
1,103,554.77
2,104,2332.01
3,105,0.00
4,106,0.00
...,...,...
195,297,109.00
196,298,2399.00
197,299,0.00
198,300,0.00


In [5]:
targets['expenses'].mean()

656.25255

In [6]:
# targets['expenses'] = targets['expenses'].apply(lambda b: 1 if b > 5000 else 0) binarization
targets['expenses'] = targets['expenses'].apply(lambda x: x/100)

In [7]:
some_net = SimpleRegressor()
users_data = aggregate_users_data(train_sessions, users, products)

In [8]:
train(some_net, users_data, targets)

Epoch 0 loss 9.88e+04
Epoch 1 loss 6.61e+04
Epoch 2 loss 4.11e+04
Epoch 3 loss 2.46e+04
Epoch 4 loss 1.07e+04
Epoch 5 loss 4.43e+03
Epoch 6 loss 1.11e+03
Epoch 7 loss 2.16e+02
Epoch 8 loss 6.7e+02
Epoch 9 loss 1.62e+03
Epoch 10 loss 2.31e+03
Epoch 11 loss 2.39e+03
Epoch 12 loss 1.7e+03
Epoch 13 loss 1.15e+03
Epoch 14 loss 7.79e+02
Epoch 15 loss 3.33e+02
Epoch 16 loss 2.33e+02
Epoch 17 loss 2.28e+02
Epoch 18 loss 2.36e+02
Epoch 19 loss 2.15e+02
Epoch 20 loss 3.5e+02
Epoch 21 loss 2.43e+02
Epoch 22 loss 2.85e+02
Epoch 23 loss 1.73e+02
Epoch 24 loss 1.97e+02
Epoch 25 loss 1.5e+02
Epoch 26 loss 1.46e+02
Epoch 27 loss 1.65e+02
Epoch 28 loss 1.59e+02
Epoch 29 loss 1.81e+02
Epoch 30 loss 1.62e+02
Epoch 31 loss 1.85e+02
Epoch 32 loss 1.53e+02
Epoch 33 loss 1.74e+02
Epoch 34 loss 1.97e+02
Epoch 35 loss 1.54e+02
Epoch 36 loss 1.55e+02
Epoch 37 loss 2.41e+02
Epoch 38 loss 2.22e+02
Epoch 39 loss 1.47e+02
Epoch 40 loss 1.4e+02
Epoch 41 loss 1.58e+02
Epoch 42 loss 1.94e+02
Epoch 43 loss 1.5e+02
Epoc

In [9]:
extracted_users_data = aggregate_users_data(train_sessions, users, products)
x, cat_x = prepare_data_for_predict(extracted_users_data)
x = torch.from_numpy(x.values).float()
cat_x = torch.from_numpy(cat_x.values).float()
some_net.eval()
out = some_net(x, cat_x).squeeze()
out = out.detach().numpy()

In [10]:
# out_array = {int(user_id): out[i] > 0.3 for i, user_id in enumerate(extracted_users_data["user_id"].to_list())} binarization
out_array = {int(user_id): out[i] for i, user_id in enumerate(extracted_users_data["user_id"].to_list())}

In [11]:
evaluation_array = {int(user_id): targets['expenses'].mean() for i, user_id in enumerate(extracted_users_data["user_id"].to_list())}

In [12]:
def loss(targets, out_array):
    sum = 0
    for row in targets.values[:]:
        sum+= (row[1] - out_array[row[0]])**2

    return sum

In [13]:
loss(targets, out_array)

33887.06717132477

In [14]:
loss(targets, evaluation_array)

35484.379756659895

In [16]:
torch.save(some_net.state_dict(), "../parameters/simple_v1")