In [1]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from statistics import mean

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import seaborn as sns

from himodule.custom_classes import NasaDataset, LossAndMetric
from himodule.ae_metrics import MAPE
from himodule.normalisation import StandardScaler, MinMaxScaler
from himodule.secondary_funcs import save_object, load_object, check_path, split_dataset, \
    seed_everything, split_anomaly_normal, split_anomaly_normal23
from himodule.linear_regression import LinearRegression

from collections import defaultdict

import os
import glob

sns.set_theme(style='whitegrid', font_scale=1.2)

In [2]:
# Check for GPU availability

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f'{device=}')

device='cuda'


In [3]:
def get_targets(path: str):
    arrays = dict()
    for pth in glob.glob(os.path.join(path, '*.dat')):
        arr = np.fromfile(pth)
        arrays[int(pth.rsplit('\\', maxsplit=1)[-1][:-4])] = arr
    
    new_arrays = dict()
    keys = sorted(list(arrays.keys()))
    for key in keys:
        new_arrays[key] = arrays[key]
    return new_arrays

def transform_targets(targets: dict):
    targets = [np.array((targs, [machine_id]*len(targs))) for machine_id, targs in targets.items()]
    targets = np.concatenate(targets, axis=1)[0]
    return targets

In [4]:
train_targets = transform_targets(get_targets('../Smoothed/train'))[:, None]
test_targets = transform_targets(get_targets('../Smoothed/test'))[:, None]

In [5]:
seed = 37
batch_size = 20
window_size = 5

# Whole dataset loading
train_dataset = NasaDataset('../datasets/clean_train_data.csv', targets=train_targets)

test_dataset = NasaDataset('../datasets/clean_test_data.csv', targets=test_targets)

scaler_path = '../scalers/MinMaxScaler.pkl'
scaler = load_object('../scalers/MinMaxScaler.pkl')
try:
    norm_name = repr(scaler).split(' ', maxsplit=2)[0].split('.')[-1]
except IndexError:
    norm_name = 'no_scaling'

train_dataset.to(device)
train_dataset.dataset = scaler.transform(train_dataset.dataset)

test_dataset.to(device)
test_dataset.dataset = scaler.transform(test_dataset.dataset)

seed_everything(seed)
g = torch.Generator()
g.manual_seed(seed)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, generator=g)

seed_everything(seed)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, generator=g)

print(f'Train: {len(train_dataset)}\nTest: {len(test_dataset)}')

input_shape = train_dataset.get_input_shape()

seed_everything(seed)
linear_model = LinearRegression(input_shape).to(device)
loss_func = nn.MSELoss()
metric_func = MAPE()
optimiser = optim.Adam(linear_model.parameters(),
                       lr=1e-3)
optimiser_name = repr(optimiser).split(' ', maxsplit=1)[0]

Train: 20631
Test: 13096


In [6]:
epochs = 100
history = list()

# Model training on normal data only

for epoch in range(epochs):
    train_losses = list()
    train_metrics = list()
    for dta in train_loader:
        sample = dta['sensors']
        hi_target = dta['targets']
        sample = sample.to(device)
        hi = linear_model(sample)

        loss = loss_func(hi, hi_target)
        metric = metric_func(hi, hi_target)

        optimiser.zero_grad()
        loss.backward()
        optimiser.step()

        train_losses.append(loss.item())
        train_metrics.append(metric.item())
    
    train_loss = mean(train_losses)
    train_metrics = mean(train_metrics)
    history.append((epoch, train_loss, train_metrics))
    if (epoch + 1) % 10 == 0 or epoch == epochs - 1:
        print(f'{epoch+1:>3}/{epochs:>3}: {train_loss=:.4f}, {train_metrics=:.4f}%')

with torch.no_grad():
    test_losses = list()
    test_metrics = list()
    for dta in test_loader:
        sample = dta['sensors']
        hi_target = dta['targets']
        sample = sample.to(device)
        hi = linear_model(sample)

        loss = loss_func(hi, hi_target)
        metric = metric_func(hi, hi_target)

        test_losses.append(loss.item())
        test_metrics.append(metric.item())
    
    print(f'\nTest: {mean(test_losses)=:.4f}, {mean(test_metrics)=:.4f}%')

 10/100: train_loss=0.0140, train_metrics=12.2638%
 20/100: train_loss=0.0128, train_metrics=11.7513%
 30/100: train_loss=0.0122, train_metrics=11.5041%
 40/100: train_loss=0.0119, train_metrics=11.3690%
 50/100: train_loss=0.0118, train_metrics=11.2950%
 60/100: train_loss=0.0117, train_metrics=11.2558%
 70/100: train_loss=0.0116, train_metrics=11.2348%
 80/100: train_loss=0.0116, train_metrics=11.2237%
 90/100: train_loss=0.0116, train_metrics=11.2177%
100/100: train_loss=0.0116, train_metrics=11.2147%

Test: mean(test_losses)=0.0417, mean(test_metrics)=27.5697%


In [15]:
test_targets[:, None].shape

(13096, 1)