In [1]:
import torch
from torch import nn

from src.data_loading import load_dataset_1d

from src.core import train, test

In [2]:
BATCH_SIZE = 64

In [3]:
train_dataset = load_dataset_1d('train_dataset.csv', BATCH_SIZE)
test_dataset = load_dataset_1d('test_dataset.csv', BATCH_SIZE)
len(train_dataset), len(test_dataset)

(13149, 3287)

In [4]:
train_dataset[0][0].shape

torch.Size([64, 768])

In [5]:
def accuracy(out, truth):
    return torch.abs(truth - out)

In [10]:
x0 = 768
x1 = 2 ** 10
x2 = 2 ** 5
x3 = 2 ** 3


class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.layer1 = nn.Linear(x0, x1)
        self.layer2 = nn.Linear(x1, x2)
        self.layer3 = nn.Linear(x2, x3)
        self.layer4 = nn.Linear(x3, 1)
        self.classifier = nn.Sequential(self.layer1,
                                        nn.ReLU(),
                                        self.layer2,
                                        nn.ReLU(),
                                        self.layer3,
                                        nn.ReLU(),
                                        self.layer4)

    def forward(self, X):
        return self.classifier.forward(X)


model = Model()
optimizer = torch.optim.SGD(model.classifier.parameters(), lr=0.05),

In [11]:
train(train_dataset,
      BATCH_SIZE,
      model,
      nn.MSELoss(),
      optimizer,
      accuracy,
      100)

Dataset size: 13149
Epoch [1/100], train_loss: 8.473548752933919, train_accuracy: 2.3368464524469066, time: 10.742s
Epoch [2/100], train_loss: 6.5725913282328845, train_accuracy: 1.9981716491938708, time: 10.949s
Epoch [3/100], train_loss: 5.714405312620829, train_accuracy: 1.8226621955671694, time: 10.439s
Epoch [4/100], train_loss: 5.082206896211958, train_accuracy: 1.693927256963454, time: 10.607s
Epoch [5/100], train_loss: 4.576684761885065, train_accuracy: 1.5914553842333137, time: 10.295s
Epoch [6/100], train_loss: 4.154466583303774, train_accuracy: 1.5052508849301296, time: 10.233s
Epoch [7/100], train_loss: 3.816964927522191, train_accuracy: 1.4360176189851104, time: 10.518s
Epoch [8/100], train_loss: 3.5360835215394255, train_accuracy: 1.377606214910544, time: 10.139s
Epoch [9/100], train_loss: 3.2975786343592004, train_accuracy: 1.327489771678136, time: 10.336s
Epoch [10/100], train_loss: 3.096678844680622, train_accuracy: 1.2850384445179188, time: 10.583s
Epoch [11/100], tra

In [12]:
test(test_dataset, BATCH_SIZE, model, nn.MSELoss(), accuracy)

Dataset size: 3287
test_loss: 11.239357554879927, test_accuracy: 2.518847796885772, time: 0.846s


In [13]:
test(train_dataset, BATCH_SIZE, model, nn.MSELoss(), accuracy)

Dataset size: 13149
test_loss: 7.408099949826795, test_accuracy: 1.915594685817909, time: 3.389s


In [None]:
torch.save(dict(model.state_dict()), 'model.pt')

In [14]:
import time
import math


def debug(data, batch_size, optimizer, model, criterion, accuracy):
    model.cuda()
    model.eval()
    criterion.cuda()
    size = len(data)
    print("Dataset size:", size)
    time_started = time.time() * 1000
    loss_sum = 0.0
    accuracy_sum = 0.0
    for batch, truth in data:
        out = model.forward(batch).reshape(batch_size)
        loss = criterion(out, truth)
        accuracy_value = accuracy(out, truth).sum() / batch_size

        loss_sum += loss.item()
        accuracy_sum += accuracy_value.item()

    passed_time = math.ceil(time.time() * 1000 - time_started)
    loss_average = loss_sum / size
    accuracy_average = accuracy_sum / size
    print(
        f"test_loss: {loss_average}, test_accuracy: {accuracy_average}, time: {passed_time / 1000}s")

In [15]:
debug(train_dataset, BATCH_SIZE, optimizer, model, nn.MSELoss(), accuracy)

Dataset size: 13149
test_loss: 7.408099949826795, test_accuracy: 1.915594685817909, time: 3.756s
