In [None]:
import os

from collections import defaultdict

import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

import plotly.graph_objects as go
from plotly.subplots import make_subplots

from DataLoader import (
    loader,
    config
)

from Processer import preprocesser

from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

In [None]:
df = pd.read_csv("../data/processed/mixed/features.csv")

In [None]:
date = df["Unnamed: 0"]
df = df.drop("Unnamed: 0", axis=1)

In [None]:
feat_names = ["kurtosis", "skewness", "mean", "shapeFactor", "2.Вибропреобразователь ППДв 5_1 Рост СКЗ Виброускорения"]
df_selected = df[feat_names]

In [None]:
class CustomAutoencoder(nn.Module):
    def __init__(self, param):
        super(CustomAutoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(param[0], param[1]),
            nn.BatchNorm1d(param[1]),
            nn.ReLU(),
            nn.Linear(param[1], param[2]),
            nn.BatchNorm1d(param[2]),
            nn.ReLU(),
            nn.Linear(param[2], param[3])
        )
        self.decoder = nn.Sequential(
            nn.Linear(param[3], param[2]),
            nn.BatchNorm1d(param[2]),
            nn.ReLU(),
            nn.Linear(param[2], param[1]),
            nn.BatchNorm1d(param[1]),
            nn.ReLU(),
            nn.Linear(param[1], param[0])
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

def arch(param, data):
    model = CustomAutoencoder(param)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=param[4])
    
    # Предполагая, что data это TensorDataset, а не np.array
    train_loader = DataLoader(data, batch_size=param[5])
    
    for epoch in range(100):
        running_loss = 0.0
        for inputs in train_loader:
            inputs = inputs[0]
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, inputs)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {running_loss / len(train_loader)}")
    
    return model

In [None]:
train_data = df_selected.iloc[:700]
val_data = df_selected.iloc[700:1000]
test_data = df_selected.iloc[1000:]

X_train = torch.tensor(train_data.values, dtype=torch.float32)
X_val = torch.tensor(val_data.values, dtype=torch.float32)
X_test = torch.tensor(test_data.values, dtype=torch.float32)

In [None]:
input_size = 5
param = [input_size, 64, 32, 16, 0.001, 32]

train_dataset = TensorDataset(X_train)

trained_model = arch(param, train_dataset)

In [None]:
reconstructed = trained_model(X_val)

In [None]:
sns.lineplot(reconstructed[:, 4].detach().numpy())
sns.lineplot(X_val[:, 4], c="red")

In [None]:
sns.lineplot(X_val - reconstructed.detach().numpy())
plt.yscale("log")
plt.xscale("log")

In [None]:
reconstructed = trained_model(X_train)
sns.lineplot(X_train - reconstructed.detach().numpy())
# plt.yscale("log")

In [None]:
(X_train - reconstructed.detach().numpy()).min()

In [None]:
X = torch.tensor(df_selected.values, dtype=torch.float32)

In [None]:
thresholds = []

for col in X.T:
    thresholds.append(col.mean() + col.std())


In [None]:
def calc_cumulative_error(data, thresh):
    # Traverse array linearly
    values = data.tolist()
    errors = [0] * len(values)
    for i in range(1, len(values)):
        if values[i] >= thresh:
            errors[i] += errors[i - 1] + 1
        else:
            errors[i] = errors[i - 1]
            
    return errors

In [None]:
recon = trained_model(X).detach().numpy()
error_mat = []
for i, col in enumerate(recon.T):
    error_mat.append(calc_cumulative_error(col, thresholds[i]))

In [None]:
sns.lineplot(error_mat)

In [None]:
df_selected

Попробуем рассмотреть все признаки

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
df = scaler.fit_transform(df)

X = torch.tensor(df, dtype=torch.float32)

thresholds = []
for col in X.T:
    thresholds.append(col.mean() + 2*col.std())


In [None]:
train_data = df[:700]
val_data = df[700:1000]
test_data = df[1000:]

X_train = torch.tensor(train_data, dtype=torch.float32)
X_val = torch.tensor(val_data, dtype=torch.float32)
X_test = torch.tensor(test_data, dtype=torch.float32)

In [None]:
input_size = 13
param = [input_size, 64, 32, 16, 0.001, 32]

train_dataset = TensorDataset(X_train)

trained_model = arch(param, train_dataset)

In [None]:
recon = trained_model(X).detach().numpy()
error_mat = []
for i, col in enumerate(recon.T):
    error_mat.append(calc_cumulative_error(col, thresholds[i]))
sns.lineplot(error_mat, legend=False)
# plt.yscale("log")

In [None]:
sns.lineplot(df[:, -1])
plt.axhline(df[:, -1].mean(), c="red")
plt.axhspan(ymin=df[:, -1].mean() - 2*df[:, -1].std(), ymax=df[:, -1].mean() + 2*df[:, -1].std(), alpha=0.5)

In [None]:
sns.lineplot(np.sum(np.array(error_mat), axis=0))

In [None]:
def normalize_errors(errors):
    max_error = max(errors)
    if max_error == 0:
        return [0] * len(errors) 
    normalized = [e / max_error for e in errors]
    return normalized

def invert_normalized_errors(normalized_errors):
    health_indicator = [1 - e for e in normalized_errors]
    return health_indicator

In [None]:
normalized_errors = normalize_errors(np.sum(np.array(error_mat), axis=0))
health_indicator = invert_normalized_errors(normalized_errors)
sns.lineplot(health_indicator)

In [None]:
# Добавление первого графика
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(health_indicator)), y=health_indicator, mode="lines", name="HI"))
fig.update_layout(
    title="Health Indicator",
    xaxis_title="Index",
    yaxis_title="Health Indicator",
    template="plotly_dark"
)

In [None]:
fig, ax = plt.subplots()
sns.lineplot(health_indicator, ax=ax)
sns.lineplot(np.linspace(1, 0, 2155), ax=ax)