# TEST DATASET AND DATALOADER CLASSES FOR TABULAR DATA

In [2]:
import torch
import torch.functional as F

In [3]:
from data_loader.data_loaders import TabularDataset, TabularDataLoader

In [5]:
dataset = TabularDataset(dataset_path='bpic2011_f3_full', dataset_file='bpic2011_f3', target_col='Classification', 
                         header_in_data=True, training=True, scale = True)

In [6]:
dataloader = TabularDataLoader( dataset_path='bpic2011_f3_full', dataset_file='bpic2011_f3', target_col='Classification', 
                         header_in_data=True, training=True, scale = True, batch_size=4, shuffle=True, validation_split=0.2, num_workers=0)

In [7]:
train_features, train_labels = next(iter(dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")

Feature batch shape: torch.Size([4, 277])
Labels batch shape: torch.Size([4, 1])


In [8]:
img = train_features[0].squeeze()
label = train_labels[0]

In [9]:
print(img)

tensor([-1.2584, -0.4211, -0.0420,  0.5285,  1.3035,  0.1436, -0.1361, -1.1700,
         0.3271,  0.7628,  1.3851,  1.3472, -1.2419,  0.9324,  1.0003,  1.6175,
        -0.2599,  1.2783,  0.4190, -1.2549, -1.4043, -1.5071, -0.8685, -1.4115,
        -0.9120, -0.1028, -0.7566,  1.1672,  1.1520,  0.3604, -1.0346, -0.7196,
        -0.9478,  0.3472,  1.2950, -1.4619,  1.0177, -0.4309, -1.4137,  1.2873,
        -1.4859,  0.4024,  0.8659, -0.7270, -0.0431, -1.6291, -1.5062, -0.1107,
        -1.9019, -0.3872, -0.8851,  1.4809,  0.7728, -1.1056, -1.2949,  1.7075,
         0.3535, -1.1702, -0.3560, -0.8405, -1.4493, -0.0126, -0.2374, -0.8642,
         1.1975, -1.1660,  1.4255, -1.5072, -0.9328, -0.2998,  0.8325,  0.1521,
        -1.5369,  0.4975, -1.5637,  0.2126,  0.7961, -0.1812, -1.5476, -1.1539,
         0.8365,  0.3717,  1.2556,  0.4311, -0.0259,  0.8059, -1.1725,  0.1031,
         0.8886, -0.1550, -0.2717, -0.4113,  0.0226, -0.8306, -0.8160, -1.0826,
         0.0655, -0.1395,  0.3520, -1.36

In [10]:
print(label)

tensor([0.])


## Test __read_files method

In [11]:
import pandas as pd

dataset_path='bpic2011_f3_full'
dataset_file='bpic2011_f3'

In [12]:
file_name_train = 'data/'+ dataset_path + '/' + dataset_file + '_train.csv.zip'
file_name_test = 'data/'+ dataset_path + '/' + dataset_file + '_test.csv.zip'

In [13]:
target_col='Classification'
train_x = pd.read_csv(file_name_train, sep=',', header=0)
train_y = train_x[target_col].values
train_x.drop(target_col, axis=1, inplace= True)
train_x = train_x.values

# MODEL and FORWARD

In [14]:
from model.model import MoE

In [15]:
model = MoE(input_size=277, num_experts=3, topk=0)

In [16]:
model.forward(train_features).gate_probs

tensor([[0.5354, 0.3675, 0.0971],
        [0.2489, 0.3619, 0.3891],
        [0.2470, 0.1123, 0.6407],
        [0.0882, 0.3730, 0.5388]], grad_fn=<SoftmaxBackward0>)

In [17]:
torch.bmm(model.forward(train_features).gate_probs.unsqueeze(1), model.forward(train_features).experts_outputs)

tensor([[[ 0.7074]],

        [[-1.1435]],

        [[ 2.1880]],

        [[-0.1888]]], grad_fn=<BmmBackward0>)

In [18]:
model.forward(train_features).experts_outputs.shape

torch.Size([4, 3, 1])

In [19]:
model.forward(train_features).gate_probs.shape

torch.Size([4, 3])

In [20]:
mytensor = torch.tensor([[2,1], [3,2], [1, 2]])

In [21]:
mytensor.unsqueeze(1).shape


torch.Size([3, 1, 2])

In [22]:
mytarget= torch.tensor([[1], [-1], [1]])

In [23]:
mytarget.shape

torch.Size([3, 1])

In [24]:
mytarget * mytensor

tensor([[ 2,  1],
        [-3, -2],
        [ 1,  2]])

In [25]:
true = 2 * train_labels -1

In [26]:
true

tensor([[-1.],
        [ 1.],
        [ 1.],
        [ 1.]])

In [27]:
model.forward(train_features).experts_outputs.shape[1]

3

In [28]:
torch.sigmoid(model.forward(train_features).experts_outputs * true.unsqueeze(1))

tensor([[[0.4424],
         [0.4096],
         [0.4922]],

        [[0.1755],
         [0.3745],
         [0.2997]],

        [[0.8281],
         [0.8622],
         [0.9299]],

        [[0.1340],
         [0.8331],
         [0.3600]]], grad_fn=<SigmoidBackward0>)

In [29]:
torch.bmm(model.forward(train_features).gate_probs.unsqueeze(1), torch.sigmoid(model.forward(train_features).experts_outputs * true.unsqueeze(1)))

tensor([[[0.5045]],

        [[0.1752]],

        [[0.8581]],

        [[0.4670]]], grad_fn=<BmmBackward0>)