In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

### Data preparation
extract current data from the database and prepare for processing

In [None]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader
import torch.nn.functional as F

CLASSES = []


def data_preparation(dataset_t, test_size=0.2, random_state=0):
    scaler = MinMaxScaler()  # TODO: choose scaler
    scaler.fit(dataset_t.data)

    y = scaler.transform(dataset_t.data)
    x = dataset_t.target
    train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=test_size, random_state=random_state)

    train_x = torch.Tensor(train_x)
    train_y = F.one_hot(torch.tensor(train_y), len(CLASSES)).to(torch.float32)

    test_x = torch.Tensor(test_x)
    test_y = F.one_hot(torch.tensor(test_y), len(CLASSES)).to(torch.float32)  # TODO: choose encoding (one_hot, etc.)

    train_dataset = TensorDataset(train_x, train_y)
    test_dataset = TensorDataset(test_x, test_y)
    train_loader_t = DataLoader(train_dataset)
    test_loader_t = DataLoader(test_dataset)

    return train_loader_t, test_loader_t


### Training the Model

In [None]:
import pandas as pd

LR = 1e-2
MOMENTUM = 0.9
NUM_EPOCHS = 100

dataset = "placeholder"


def start_ml(data):
    global dataset
    dataset = pd.DataFrame(data)


train_loader, test_loader = data_preparation(dataset)  # with default values

In [None]:
import surfacemodelclass as sf


def train_model(model, device, train_loader, num_epochs=100, lr=1e-3, momentum=0.9):
    criterion = nn.CrossEntropyLoss()
    # criterion = nn.MSELoss()

    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
    # optimizer = optim.Adam(model.parameters(), lr=lr)

    print("Training STARTED")
    model.to(device)

    for e in range(0, num_epochs):
        model.train()  # set the model in training mode
        total_train_loss = 0  # initialize the total training and validation loss

        for i, (x, y) in enumerate(train_loader):  # loop over the training set
            (x, y) = (x.to(device), y.to(torch.float32).to(device))  # send the input to the device
            pred = model(x)  # perform a forward pass and calculate the training loss

            loss = criterion(pred, y)

            # zero out the gradients, perform the backpropagation step, and update the weights
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # add the loss to the total training loss so far and calculate the number of correct predictions
            total_train_loss += loss

        if (e + 1) % 10 == 0 | e == 0:
            print("Epoch", e, "Training Loss:", total_train_loss.item())

        print("Training FINISHED")

    return model, criterion, optimizer


model = sf.SurfaceModel()  # TODO: RNN
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")  # for testing on my Mac
model, criterion, optimizer = train_model(model=model, device=device, train_loader=train_loader,
                                          num_epochs=NUM_EPOCHS, lr=LR, momentum=MOMENTUM)


## Testing

In [None]:
import numpy as np
import util

In [None]:
def print_training_accuracy(model, train_loader, criterion, classes,
                            device=torch.device("mps" if torch.backends.mps.is_available() else "cpu")
                            ):
    training_loss, class_correct, class_total = util.compute_accuracy(model, train_loader, device, criterion)

    # average training loss
    training_loss = training_loss / len(train_loader.dataset)
    print('Training Loss: {:.6f}\n'.format(training_loss))
    for i in range(10):
        if class_total[i] > 0:
            print('Training Accuracy of %5s: %2d%% (%2d/%2d)' % (
                classes[i], 100.0 * class_correct[i] / class_total[i],
                np.sum(class_correct[i]), np.sum(class_total[i])))
        else:
            print('Training Accuracy of %5s: N/A ' % (classes[i]))

    print('\Training Accuracy (Overall): %2d%% (%2d/%2d)' % (
        100. * np.sum(class_correct) / np.sum(class_total),
        np.sum(class_correct), np.sum(class_total)))


print_training_accuracy(model, train_loader, criterion, CLASSES, device)

In [None]:
def print_testing_accuracy(model, test_loader, device, criterion, classes):
    test_loss, class_correct, class_total = util.compute_accuracy(model, test_loader, device, criterion)

    # average test loss
    test_loss = test_loss / len(test_loader.dataset)
    print('Test Loss: {:.6f}\n'.format(test_loss))

    for i in range(10):
        if class_total[i] > 0:
            print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
                classes[i], 100.0 * class_correct[i] / class_total[i],
                np.sum(class_correct[i]), np.sum(class_total[i])))
        else:
            print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))

    print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
        100. * np.sum(class_correct) / np.sum(class_total),
        np.sum(class_correct), np.sum(class_total)))


print_testing_accuracy(model, test_loader, criterion, CLASSES, device)