# TEST DATASET AND DATALOADER CLASSES FOR TABULAR DATA

In [49]:
import torch
import torch.functional as F

In [50]:
from data_loader.data_loaders import TabularDataset, TabularDataLoader

In [6]:
dataset = TabularDataset(dataset_path='bpic2011_f3_full', dataset_file='bpic2011_f3', target_col='Classification', 
                         header_in_data=True, training=True, scale = True)

In [51]:
dataloader = TabularDataLoader( dataset_path='bpic2011_f3_full', dataset_file='bpic2011_f3', target_col='Classification', 
                         header_in_data=True, training=True, scale = True, batch_size=4, shuffle=True, validation_split=0.2, num_workers=0)

In [52]:
train_features, train_labels = next(iter(dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")

Feature batch shape: torch.Size([4, 277])
Labels batch shape: torch.Size([4, 1])


In [53]:
img = train_features[0].squeeze()
label = train_labels[0]

In [54]:
print(img)

tensor([ 3.0027e-01, -3.0542e-01,  2.8517e+00, -9.9881e-02,  1.5587e+00,
        -1.8901e-01, -2.4777e-02, -1.6515e-02, -8.2569e-03, -2.7907e-01,
        -2.1850e-02, -2.9891e-01, -3.0908e-02, -2.7394e-02, -7.6343e-02,
        -9.2325e-02,  7.5255e-01, -1.5313e-01, -1.3745e-01, -1.7591e-01,
        -1.3514e+00, -3.1065e-02, -2.3891e-01, -8.6642e-02, -8.6122e-02,
        -1.5076e-01, -1.0620e-01, -2.2213e-01, -2.2040e-01, -6.5150e-02,
        -1.2481e-01, -1.0501e-01, -5.4850e-02, -1.2027e+00, -1.1395e+00,
        -1.0678e+00, -1.4226e-01, -1.0924e-01, -1.5217e-01, -4.0482e-02,
        -9.8868e-02, -1.5198e-01, -1.0183e-01, -2.5686e-01, -7.5890e-02,
        -1.2339e-01, -1.0573e+00, -1.0128e+00, -2.0628e-01, -1.8570e-01,
        -7.4515e-02, -7.4056e-02, -1.0040e-01, -9.4192e-02, -1.2407e-01,
        -1.4620e-01, -8.8029e-02, -6.1863e-01, -8.7319e-02, -2.4126e-02,
        -9.4676e-02, -1.9949e-01, -6.4848e-01, -1.0130e-01, -6.3934e-01,
        -7.4976e-02, -8.0430e-02, -7.8023e-02, -5.2

In [63]:
print(label)

tensor([0.])


## Test __read_files method

In [10]:
import pandas as pd

dataset_path='bpic2011_f3_full'
dataset_file='bpic2011_f3'

In [11]:
file_name_train = 'data/'+ dataset_path + '/' + dataset_file + '_train.csv.zip'
file_name_test = 'data/'+ dataset_path + '/' + dataset_file + '_test.csv.zip'

In [12]:
target_col='Classification'
train_x = pd.read_csv(file_name_train, sep=',', header=0)
train_y = train_x[target_col].values
train_x.drop(target_col, axis=1, inplace= True)
train_x = train_x.values

# MODEL and FORWARD

In [56]:
from model.model import MoE

In [57]:
model = MoE(input_size=277, num_experts=3, topk=0)

In [58]:
model.forward(train_features).gate_probs

tensor([[0.4777, 0.2117, 0.3106],
        [0.1490, 0.3927, 0.4583],
        [0.1392, 0.4926, 0.3682],
        [0.8512, 0.0288, 0.1200]], grad_fn=<SoftmaxBackward0>)

In [59]:
torch.bmm(model.forward(train_features).gate_probs.unsqueeze(1), model.forward(train_features).experts_outputs)

tensor([[[-1.3740]],

        [[ 0.0567]],

        [[-2.0114]],

        [[-3.5443]]], grad_fn=<BmmBackward0>)

In [38]:
model.forward(train_features).experts_outputs.shape

torch.Size([32, 3, 1])

In [39]:
model.forward(train_features).gate_probs.shape

torch.Size([32, 3])

In [41]:
mytensor = torch.tensor([[2,1], [3,2], [1, 2]])

In [48]:
mytensor.unsqueeze(1).shape


torch.Size([3, 1, 2])

In [60]:
mytarget= torch.tensor([[1], [-1], [1]])

In [61]:
mytarget.shape

torch.Size([3, 1])

In [62]:
mytarget * mytensor

tensor([[ 2,  1],
        [-3, -2],
        [ 1,  2]])

In [67]:
true = 2 * train_labels -1

In [68]:
true

tensor([[-1.],
        [-1.],
        [-1.],
        [ 1.]])

In [82]:
model.forward(train_features).experts_outputs.shape[1]

3

In [79]:
torch.sigmoid(model.forward(train_features).experts_outputs * true.unsqueeze(1))

tensor([[[0.4665],
         [0.8806],
         [0.9065]],

        [[0.7805],
         [0.8393],
         [0.1301]],

        [[0.9939],
         [0.0666],
         [0.4295]],

        [[0.0177],
         [0.4698],
         [0.1122]]], grad_fn=<SigmoidBackward0>)

In [80]:
torch.bmm(model.forward(train_features).gate_probs.unsqueeze(1), torch.sigmoid(model.forward(train_features).experts_outputs * true.unsqueeze(1)))

tensor([[[0.7012]],

        [[0.4351]],

        [[0.5098]],

        [[0.0221]]], grad_fn=<BmmBackward0>)