In [1]:
import warnings

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from xgboost import XGBClassifier, XGBRegressor
from imblearn.over_sampling import SMOTE

from src.features import build_features
from src.models import predict_model
from src.train.train import train, evaluation, prediction_to_csv
from src.data.make_dataset import DatasetLoader
from src.visualization.visual import anomaly_plot
from src.config.config import seed_everything, cfg

warnings.filterwarnings(action='ignore')
seed_everything(cfg.SEED)

scaler = MinMaxScaler()

In [2]:
train_data = pd.read_csv(r'data\raw\train_data.csv')
train_data = build_features.create_derived_features(train_data)

train_type = train_data['type']
train_data = train_data.drop('type', axis=1)


scaled_train_data = scaler.fit_transform(train_data)

scaled_train_data = pd.DataFrame(scaled_train_data, columns=train_data.columns)


In [3]:
sm = SMOTE(random_state=42)
x_res, y_res = sm.fit_resample(scaled_train_data, train_type)

In [4]:
scaled_train_data = pd.concat([x_res, y_res], axis=1)

In [5]:
test_data = pd.read_csv(r'data\raw\test_data.csv')
test_data = build_features.create_derived_features(test_data)
test_type = test_data['type']
scaled_test_data = scaler.transform(test_data.drop('type', axis=1))
scaled_test_data = pd.DataFrame(scaled_test_data, columns=train_data.columns)
scaled_test_data['type'] = test_type

In [6]:
grouped_train = scaled_train_data.groupby('type')

preds = []
ths = []
for group_name, group_data in grouped_train:
    test_group = scaled_test_data[scaled_test_data['type'] == group_name]
    train_group = group_data.drop('type', axis=1).values
    test_group = test_group.drop('type', axis=1).values

    n_features = train_group.shape[1]
    print(n_features)
    dataloader = DatasetLoader(train_group, test_group)
    train_loader, test_loader = dataloader.load
    model = predict_model.AutoEncoder(input_dim=n_features, latent_dim=64)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    train(train_loader, model, criterion, optimizer)

    prediction, cosine = evaluation(test_loader, model)
    preds.append(prediction)
    ths.append(cosine)
    print(f"finish {group_name}type")

threshold = np.concatenate(ths)
preds = np.concatenate(preds)
prediction_to_csv(preds)

25
Epoch [1/1000], Loss: 0.3558642
Epoch [2/1000], Loss: 0.2123693
Epoch [3/1000], Loss: 0.1406181
Epoch [4/1000], Loss: 0.0823059
Epoch [5/1000], Loss: 0.0492239
Epoch [6/1000], Loss: 0.0296831
Epoch [7/1000], Loss: 0.0204742
Epoch [8/1000], Loss: 0.0200938
Epoch [9/1000], Loss: 0.0212491
Epoch [10/1000], Loss: 0.0215394
Epoch [11/1000], Loss: 0.0227707
Epoch [12/1000], Loss: 0.0229594
Epoch [13/1000], Loss: 0.0221928
Epoch [14/1000], Loss: 0.0204684
Epoch [15/1000], Loss: 0.0176247
Epoch [16/1000], Loss: 0.0149112
Epoch [17/1000], Loss: 0.0127582
Epoch [18/1000], Loss: 0.0105253
Epoch [19/1000], Loss: 0.0083847
Epoch [20/1000], Loss: 0.0068979
Epoch [21/1000], Loss: 0.0058760
Epoch [22/1000], Loss: 0.0049598
Epoch [23/1000], Loss: 0.0041789
Epoch [24/1000], Loss: 0.0036385
Epoch [25/1000], Loss: 0.0033104
Epoch [26/1000], Loss: 0.0031344
Epoch [27/1000], Loss: 0.0030734
Epoch [28/1000], Loss: 0.0030461
Epoch [29/1000], Loss: 0.0029472
Epoch [30/1000], Loss: 0.0027455
Epoch [31/1000],