In [None]:
import torch
from torch.utils.data import DataLoader
from torch import nn

import numpy as np
import pandas as pd
import glob
import os
from tqdm import tqdm
import sys

import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme(style='whitegrid', font_scale=1.2)

from himodule.custom_classes import NasaDataset
from himodule.linear_regression import LinearRegression
from himodule.secondary_funcs import load_object, seed_everything, check_path
from himodule.normalisation import MinMaxScaler
from himodule.rul_metrics import RULScore, RMSELoss

def find_closest_subarray(large_vector, small_vector):
    large_len = len(large_vector)
    small_len = len(small_vector)

    min_diff = [float('inf')]*3
    start_index = [0]*3

    for i in range(large_len - small_len + 1):
        subarray = large_vector[i:i + small_len]
        diff = torch.sqrt(torch.sum(torch.square(subarray - small_vector))).item()

        for idx, mdif in enumerate(min_diff):
            if diff < mdif:
                min_diff[idx] = diff
                start_index[idx] = i

    end_index = [st_index + small_len for st_index in start_index]
    return start_index, end_index, min_diff

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'{device=}')

In [None]:
SEED = 37
BATCH_SIZE = 20
TRUE_HI_PATH = '../../Smoothed/train/'

# Load datasets
train_dataset = NasaDataset('../../datasets/clean_train_data.csv')

test_dataset = NasaDataset('../../datasets/clean_test_data.csv')

scaler_path = '../../scalers/MinMaxScaler.pkl'
scaler = load_object(scaler_path)

for dataset in (train_dataset, test_dataset):
    dataset.to(device)
    dataset.dataset = scaler.transform(dataset.dataset)

g = torch.Generator()
g.manual_seed(SEED)
seed_everything(SEED)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, generator=g)

seed_everything(SEED)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False, generator=g)

print(f'Train: {len(train_dataset)}, Test: {len(test_dataset)}')

input_shape = test_dataset.get_input_shape()

model_path = '../../LinearRegression/regression.pth'
linear_model = LinearRegression(input_shape)
linear_model.load_state_dict(torch.load(model_path))
linear_model = linear_model.to(device)

In [None]:
def get_predictions(loader: DataLoader, dataset: NasaDataset, linear_model: LinearRegression):
    predictions = list()

    with torch.no_grad():
        for dta in loader:
            sample = dta['sensors']
            sample = sample.to(device)
            hi = linear_model(sample)

            predictions.append(hi)

    predictions = torch.vstack(predictions)
    predictions = torch.concat((dataset.machine_ids[:, None], predictions), dim=1)
    
    return predictions

## Smooth

In [None]:
test_predictions = get_predictions(test_loader, test_dataset, linear_model)

In [None]:
results_RUL = dict()

for machine_id in tqdm(test_dataset.machine_ids.unique(), file=sys.stdout):
    machine_id = int(machine_id.item())
    pred_vector = test_predictions[test_predictions[:,0] == machine_id][:,1].flatten()

    storage = list()

    for true_pth in glob.glob(os.path.join(TRUE_HI_PATH, '*.dat')):
        true_vector = torch.FloatTensor(np.fromfile(true_pth)).to(device)

        if len(true_vector) < len(pred_vector):
            continue

        start_index, end_index, min_diff = find_closest_subarray(true_vector, pred_vector)
        predicted_RUL = [max(len(true_vector) - e_index, 0) for e_index in end_index]
        for p_RUL, m_diff in zip(predicted_RUL, min_diff):
            storage.append([p_RUL, m_diff])
    
    storage.sort(key=lambda x: x[1])
    storage = torch.Tensor(storage).to(device)
    predicted_RUL = (storage[:5, 0]*storage[:5, 1]).sum().item() / storage[:5, 1].sum().item()
    results_RUL[machine_id] = predicted_RUL

In [None]:
with open('../../datasets/RUL_FD001.txt', 'r') as f:
    true_ruls = [int(row.strip()) for row in f]

true_ruls = torch.FloatTensor(true_ruls).to(device)
results_RUL_tensor = torch.FloatTensor(tuple(results_RUL.values())).to(device)

true_ruls[true_ruls > 125] = 125
results_RUL_tensor[results_RUL_tensor > 125] = 125

In [None]:
l_func = RMSELoss()

loss = l_func(results_RUL_tensor, true_ruls)
print(f'{loss=}')

In [None]:
s_func = RULScore()
rul_score = s_func(results_RUL_tensor, true_ruls)
print(f'{rul_score=}')

In [None]:
df = pd.DataFrame((true_ruls.cpu().numpy(), results_RUL_tensor.cpu().numpy()), index=('true', 'predicted')).T.melt(ignore_index=False)

plot_path = '../../Plots/experiments/RUL/top-3/'
check_path(plot_path)

plt.close()
fig, ax = plt.subplots()
fig.set_size_inches(10, 5)

sns.lineplot(data=df,
             x=df.index,
             y='value',
             hue='variable',
             ax=ax)

ax.set_ylabel('Remaining Useful Life')
ax.set_xlabel('Machine id')
ax.legend(title=None)
ax.set_title(f'RULScore: {rul_score:.3f},   RMSE: {loss:.3f}')

plt.tight_layout()
plt.savefig(os.path.join(plot_path, 'smooth.png'))
plt.show()

## With train

In [None]:
train_predictions = get_predictions(train_loader, train_dataset, linear_model)
test_predictions = get_predictions(test_loader, test_dataset, linear_model)

In [None]:
results_RUL = dict()

for machine_id in tqdm(test_dataset.machine_ids.unique(), file=sys.stdout):
    machine_id = int(machine_id.item())
    pred_vector = test_predictions[test_predictions[:,0] == machine_id][:,1].flatten()

    storage = list()

    for true_machine_id in train_dataset.machine_ids.unique():
        true_machine_id = int(true_machine_id.item())
        true_vector = train_predictions[train_predictions[:,0] == true_machine_id][:, 1].flatten()

        if len(true_vector) < len(pred_vector):
            continue
        start_index, end_index, min_diff = find_closest_subarray(true_vector, pred_vector)
        predicted_RUL = [max(len(true_vector) - e_index, 0) for e_index in end_index]
        for p_RUL, m_diff in zip(predicted_RUL, min_diff):
            storage.append([p_RUL, m_diff])
    
    storage.sort(key=lambda x: x[1])
    storage = torch.Tensor(storage).to(device)
    predicted_RUL = (storage[:5, 0]*storage[:5, 1]).sum().item() / storage[:5, 1].sum().item()
    results_RUL[machine_id] = predicted_RUL

In [None]:
with open('../../datasets/RUL_FD001.txt', 'r') as f:
    true_ruls = [int(row.strip()) for row in f]

true_ruls = torch.FloatTensor(true_ruls).to(device)
results_RUL_tensor = torch.FloatTensor(tuple(results_RUL.values())).to(device)
true_ruls[true_ruls > 125] = 125
results_RUL_tensor[results_RUL_tensor > 125] = 125

In [None]:
l_func = RMSELoss()

loss = l_func(results_RUL_tensor, true_ruls)
print(f'{loss=}')

In [None]:
s_func = RULScore()
rul_score = s_func(results_RUL_tensor, true_ruls)
print(f'{rul_score=}')

In [None]:
df = pd.DataFrame((true_ruls.cpu().numpy(), results_RUL_tensor.cpu().numpy()), index=('true', 'predicted')).T.melt(ignore_index=False)

plot_path = '../../Plots/experiments/RUL/top-3/'
check_path(plot_path)

plt.close()
fig, ax = plt.subplots()
fig.set_size_inches(10, 5)

sns.lineplot(data=df,
             x=df.index,
             y='value',
             hue='variable',
             ax=ax)

ax.set_ylabel('Remaining Useful Life')
ax.set_xlabel('Machine id')
ax.legend(title=None)
ax.set_title(f'RULScore: {rul_score:.3f},   RMSE: {loss:.3f}')

plt.tight_layout()
plt.savefig(os.path.join(plot_path, 'with_train.png'))
plt.show()