### Best Model 2개
- Autoencoder 
- Deep SVDD

In [5]:
import pandas as pd
import numpy as np
import os
import glob
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM
from model.AE_anbormaly_detection import Autoencoder
from model.Deep_SVDD import DeepSVDD
import torch.nn as nn
import torch.optim as optim
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
import random

In [6]:
df=pd.read_csv('E:/glass_git/ML-DL/Signal&Table/data/cromate/preped/df_merged.csv')

features=['pH','Temp','Voltage','run_time','month','day','hour','minutes','seconds']
target=['class']

normal_df=df[df['class']==0]
abnormal_df=df[df['class']==1]


In [7]:
random.seed(42)  # For reproducibility
def sampling(df,normal_df,abnormal_df):
    #test에 사용할 normal data sampling
    test_lot_index=random.sample(normal_df.Lot.unique().tolist(),9)
    test_lot_index+=abnormal_df.Lot.unique().tolist()

    #train test split, test set에 normal Lot 9개, abnormal Lot 9개(전체)
    train_df=normal_df[~normal_df['Lot'].isin(test_lot_index)]
    test_df=df[df['Lot'].isin(test_lot_index)]

    #scaled data
    scaler=StandardScaler()
    scaled_x_train=scaler.fit_transform(train_df[features])
    scaled_x_test=scaler.transform(test_df[features])
    return scaled_x_train,scaled_x_test,test_df


In [8]:
def deepsvdd_train(scaled_x_train, scaled_x_test, test_df):
    scaled_x_train = torch.tensor(scaled_x_train, dtype=torch.float32)  # 변환
    train_loader = DataLoader(TensorDataset(scaled_x_train), batch_size=256, shuffle=False)
    X_test = torch.tensor(scaled_x_test, dtype=torch.float32)
    # ---- Choose objective ----
    deepsvdd = DeepSVDD(in_dim=9, rep_dim=16, objective="one-class")             # hard-boundary
    # deepsvdd = DeepSVDD(in_dim=9, rep_dim=16, objective="soft-boundary", nu=0.05)  # soft-boundary

    deepsvdd.fit(train_loader, lr=1e-3, weight_decay=1e-6, epochs=50, R_update_freq=5)
    if deepsvdd.objective == "one-class":
        # 학습 분포의 95% 분위수를 임계값으로
        with torch.no_grad():
                s_train = deepsvdd.score(scaled_x_train)
        thr = torch.quantile(s_train, 0.97).item()
        y_pred, _ = deepsvdd.predict(X_test, threshold=thr)
        deepsvdd_pred= y_pred

    deepsvdd_thr_pred = (deepsvdd_pred > thr).int()
    acc=accuracy_score(test_df[target], deepsvdd_thr_pred)
    return acc*100

def autoencoder_train(scaled_x_train, scaled_x_test,test_df):
    #Autoencoder
    scaled_x_train = torch.tensor(scaled_x_train, dtype=torch.float32)  # 변환
    train_loader = DataLoader(TensorDataset(scaled_x_train), batch_size=256, shuffle=False)
    model = Autoencoder()
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    # train
    for epoch in range(50):
        for i,data in enumerate(train_loader):
            optimizer.zero_grad()
            outputs = model(data[0])
            loss = criterion(outputs, data[0])
            loss.backward()
            optimizer.step()

    #reconstruciont error
    X_test = torch.tensor(scaled_x_test, dtype=torch.float32)

    with torch.no_grad():
        recon = model(X_test)
        errors = torch.mean((X_test - recon) ** 2, dim=1)  # MSE per sample

    # threhold (정상 데이터 기준 99% 분위수)
    threshold = np.percentile(errors[:len(scaled_x_train)], 85)
    AE_pred = (errors > threshold).int()

    acc=accuracy_score(test_df[target], AE_pred)
    return acc*100


In [None]:
reuslts={'deepsvdd':[], 'autoencoder':[]}
for i in range(10): 
    scaled_x_train,scaled_x_test,test_df=sampling(df,normal_df,abnormal_df)
    deepsvdd_acc=deepsvdd_train(scaled_x_train, scaled_x_test, test_df)
    ae_acc=autoencoder_train(scaled_x_train, scaled_x_test,test_df)
    reuslts['deepsvdd'].append(deepsvdd_acc)
    reuslts['autoencoder'].append(ae_acc)
reuslts

[001/50] loss=0.011447
[002/50] loss=0.005159
[003/50] loss=0.002936
[004/50] loss=0.001901
[005/50] loss=0.001335
[006/50] loss=0.001048
[007/50] loss=0.000878
[008/50] loss=0.000736
[009/50] loss=0.000629
[010/50] loss=0.000553
[011/50] loss=0.000498
[012/50] loss=0.000454
[013/50] loss=0.000419
[014/50] loss=0.000388
[015/50] loss=0.000364
[016/50] loss=0.000349
[017/50] loss=0.000339
[018/50] loss=0.000326
[019/50] loss=0.000301
[020/50] loss=0.000261
[021/50] loss=0.000224
[022/50] loss=0.000202
[023/50] loss=0.000202
[024/50] loss=0.000208
[025/50] loss=0.000194
[026/50] loss=0.000182
[027/50] loss=0.000166
[028/50] loss=0.000125
[029/50] loss=0.000139
[030/50] loss=0.000185
[031/50] loss=0.000285
[032/50] loss=0.000140
[033/50] loss=0.000039
[034/50] loss=0.000060
[035/50] loss=0.000088
[036/50] loss=0.000077
[037/50] loss=0.000102
[038/50] loss=0.000101
[039/50] loss=0.000051
[040/50] loss=0.000075
[041/50] loss=0.000069
[042/50] loss=0.000115
[043/50] loss=0.000141
[044/50] lo