In [1]:
# used for statistical processes, i.e scaling the dataset

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# plotting the data
import matplotlib.pyplot as plt
# used for the dataframes
import pandas as pd

# transforming dataframes into arrays
# and those arrays to Tensors, the ML approach can work with
import numpy as np
%matplotlib inline

# required for the LSTM model
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, SubsetRandomSampler


import wandb
wandb.init(project='Hardware Utilization Prediction')

from gpu_dataloader import GPUDataset

from lstm_models import LSTM


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmy-god-its-full-of-stars[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [2]:
dataset = GPUDataset(small_df=True)

KeyboardInterrupt: 

In [None]:
dataset.X.shape

In [None]:
num_epochs: int = 100
learning_rate: float = 0.015

# number of features
input_size: int = dataset.X.shape[2]
# number of features in hidden state
hidden_size: int = dataset.X.shape[2] * 8
# number of stacked lstm layers
num_layers: int = 1
# number of output classes

num_classes: int = dataset.y.shape[1]

In [None]:
wandb.config.num_epochs = num_epochs
wandb.config.learning_rate = learning_rate
wandb.config.input_size = input_size
wandb.config.hidden_size = hidden_size
wandb.config.num_layers = num_layers
wandb.config.num_classes = num_classes

In [None]:
lstm = LSTM(num_classes, input_size, hidden_size, num_layers, dataset.X.shape[1])
lstm.train()

In [None]:
# mean squared error for regression
criterion = nn.MSELoss()
# optimizer function
optimizer = torch.optim.AdamW(lstm.parameters(), lr=learning_rate)

In [None]:
import math

def get_rmse(actual_values, predicted_values) -> float:
    '''returns the root mean squared error'''
    return math.sqrt(mean_squared_error(actual_values, predicted_values))

def get_mape(actual_values, predicted_values):
    '''returns the mean absolue percentage error'''
    return np.mean(np.abs(actual_values - predicted_values) / np.abs(actual_values) * 100)

def get_mae(actual_values, predicted_values) -> float:
    '''returns the mean absolute error'''
    return mean_absolute_error(actual_values, predicted_values)

In [None]:
LOSS: str = 'loss'
RMSE_TRAINING: str = 'root mean squared error (training)'
MAE_TRAINING: str = 'mean absolute error (training)'

wandb.define_metric(LOSS, summary='min')
wandb.define_metric(RMSE_TRAINING, summary='min')
wandb.define_metric(MAE_TRAINING, summary='min')

In [None]:
import random

batch_size: int = 1000

for epoch in range(num_epochs):
    print(f'Epoch: {epoch + 1}')

    sample_idx = [idx for idx in range(len(dataset) // batch_size)]

    while len(sample_idx) > 0:
        choice = random.choice(sample_idx)
        sampler = SubsetRandomSampler(
            list(range(choice*batch_size, (choice+1)*batch_size)))
        train_loader = DataLoader(
            dataset, batch_size=batch_size, shuffle=False, num_workers=5, sampler=sampler)

        for _, (inputs, labels) in enumerate(train_loader):
            optimizer.zero_grad()

            outputs = lstm.forward(inputs)

            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        sample_idx.remove(choice)
        print(f'removed {choice}, remaining: {len(sample_idx)}')

    # logging to wandb
    o = outputs.detach()
    rmse = get_rmse(o, labels)
    mae = get_mae(o, labels)
    log_dict: dict = {
        LOSS: loss.item(),
        RMSE_TRAINING: rmse,
        MAE_TRAINING: mae,
    }
    wandb.log(log_dict)

    if epoch % 10 == 0:
        print(
            f'Epoch: {epoch + 1}, loss: {loss.item():2f}, rmse: {rmse:2f}, mae: {mae:2f}')
