In [1]:
import numpy as np
import torch
from torch import nn, optim, Tensor
import torch.utils.data as Data
import os
import sys
sys.path.append('../')
import utils.csv as csv
import utils.validation as val
from models.transformer import TransformerClassifier

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
samples = torch.tensor([24106, 751, 3413, 1345, 2019, 1199, 8010, 964])
mx = max(samples)
weight = mx / samples
weight

tensor([ 1.0000, 32.0985,  7.0630, 17.9227, 11.9396, 20.1051,  3.0095, 25.0062])

In [2]:
# file path
PATH='D:\\Deutschland\\FUB\\master_thesis\\data\\gee\\output'
DATA_DIR = os.path.join(PATH, 'daily')
LABEL_CSV = 'label_pure.csv'
label_path = os.path.join(PATH, LABEL_CSV)

In [3]:
# general hyperparameters
BATCH_SIZE = 128
LR = 0.01
EPOCH = 100
SEED = 2048

In [4]:
# hyperparameters for Transformer model
num_bands = 10
seq_len = 25
num_classes = 5
d_model = 8
nhead = 4
num_layers = 1
dim_feedforward = 8

In [5]:
def numpy_to_tensor(x_data:np.ndarray, y_data:np.ndarray):
    x_set = torch.from_numpy(x_data)
    y_set = torch.from_numpy(y_data)
    # exchange dimension 0 and 1 of x_set depending on batch_first or not
    x_set = x_set.transpose(0, 1)
    # reduce dimention of y_set from (n, 1) to (n, )
    y_set = y_set.view(-1)
    return x_set, y_set

In [6]:
def build_dataloader(x_set:Tensor, y_set:Tensor, batch_size:int, seed:int):
    # dataset = Data.TensorDataset(x_set, y_set)
    # # split dataset
    # size = len(dataset)
    # train_size, val_size = round(0.8 * size), round(0.2 * size)
    # generator = torch.Generator().manual_seed(seed)
    # train_dataset, val_dataset = Data.random_split(dataset, [train_size, val_size], generator)
    x_train = x_set[:1105]
    y_train = y_set[:1105]
    x_val = x_set[1105:]
    y_val = y_set[1105:]
    train_dataset = Data.TensorDataset(x_train, y_train)
    val_dataset = Data.TensorDataset(x_val, y_val)
    # data_loader
    train_loader = Data.DataLoader(train_dataset,batch_size=batch_size,shuffle=True,num_workers=4)
    val_loader = Data.DataLoader(val_dataset,batch_size=len(val_dataset), shuffle=True,num_workers=4)
    return train_loader, val_loader

In [7]:
def train(model:nn.Module, epoch:int):
    total_step = len(train_loader)
    model.train()
    for i, (inputs, labels) in enumerate(train_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        # forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        # backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # record training loss
        if i % total_step == 0:
            print('Epoch[{}/{}],Step[{}/{}],Loss:{:.4f}'
            .format(epoch+1,EPOCH,i+total_step,total_step,loss.item()))

In [8]:
def validate(model:nn.Module):
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for (values, labels) in val_loader:
            values = values.to(device)
            labels = labels.to(device)
            outputs = model(values)
            total += labels.size(0)
            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == labels).sum().item()
    print('Test Accuracy of the model: {} %'.format(100 * correct / total))

In [None]:
if __name__ == "__main__":
    # Device configuration
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # dataset
    x_data, y_data = csv.to_numpy(DATA_DIR, label_path)
    x_set, y_set = numpy_to_tensor(x_data, y_data)
    train_loader, val_loader = build_dataloader(x_set, y_set, BATCH_SIZE, SEED)
    # model
    model = TransformerClassifier(num_bands, seq_len, num_classes, d_model, nhead, num_layers, dim_feedforward).to(device)
    # loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), LR)
    # train and validate model
    for epoch in range(EPOCH):
        train(model, epoch)
        validate(model)
        break
    # save model
    # torch.save(model, '../outputs/model.pkl')