# Import & Setting

In [20]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rc
import seaborn as sns

import random
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm.auto import tqdm
from sklearn.metrics import f1_score

# Parameter Setting

In [18]:
CFG = {
    'EPOCHS':10,
    'LEARNING_RATE':1e-2,
    'BATCH_SIZE':64,
    'SEED':41
}

# Seed

In [21]:
def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

# Load Data

In [2]:
file_path = '/Users/toad/Documents/ToyProject/Heat_Treatment_PdM/'

In [3]:
train = pd.read_csv(file_path+'Data/train_normal.csv').drop(columns='Unnamed: 0')
valid = pd.read_csv(file_path+'Data/valid_normal.csv').drop(columns='Unnamed: 0')
test = pd.read_csv(file_path+'Data/test_normal.csv').drop(columns='Unnamed: 0')
anomaly = pd.read_csv(file_path+'Data/anomaly_data.csv').drop(columns='Unnamed: 0')

In [4]:
print(f'Normal Data')
print(f'- Train Data : {len(train)}')
print(f'- Valid Data : {len(valid)}')
print(f'- Test Data : {len(test)}')
print(f'--------------------------------')
print(f'Anomaly Data : {len(anomaly)}')

Normal Data
- Train Data : 2020534
- Valid Data : 288648
- Test Data : 288648
--------------------------------
Anomaly Data : 2717


In [5]:
display(train.head())
print(f'Columns : {train.columns.values}')

Unnamed: 0,CP(제어) OP,건조 1존 OP,건조 2존 OP,건조로 온도 1 Zone,건조로 온도 2 Zone,세정기,소입1존 OP,소입2존 OP,소입3존 OP,소입4존 OP,...,소입로 CP 모니터 값,소입로 온도 2 Zone,솔트 1존 OP,솔트 2존 OP,솔트 슬러지 제거,솔트 컨베이어 온도 1 Zone,솔트 컨베이어 온도 2 Zone,솔트조 온도 1 Zone,outliers_cnt,anomaly
0,27.5181,69.4988,28.0363,100.366,100.353,69.2698,84.3415,57.7779,50.808,70.3781,...,1.14953e-10,860.286,70.7822,59.7691,325.772,288.802,285.324,328.856,1,0
1,27.5181,69.5777,28.2541,100.366,100.353,69.2698,89.9508,58.0981,49.5654,70.6791,...,1.15036e-10,860.272,72.1144,59.8864,325.711,288.728,285.311,328.783,1,0
2,28.8494,69.6464,30.5821,100.366,100.291,69.2085,85.0744,58.0155,51.0691,70.6326,...,1.1493e-10,860.272,72.1144,59.8864,325.833,288.789,285.372,328.783,2,0
3,28.8494,69.828,28.39,100.428,100.353,69.2698,84.9387,57.9397,49.485,70.6453,...,1.15036e-10,860.272,71.0169,59.8864,325.833,288.728,285.372,328.844,1,0
4,28.8494,72.1245,26.3033,100.366,100.353,69.2085,84.981,57.9252,49.4771,70.6567,...,1.15036e-10,860.272,71.0169,59.8864,325.833,288.728,285.372,328.844,1,0


Columns : ['CP(제어) OP' '건조 1존 OP' '건조 2존 OP' '건조로 온도 1 Zone' '건조로 온도 2 Zone' '세정기'
 '소입1존 OP' '소입2존 OP' '소입3존 OP' '소입4존 OP' '소입로 CP 값' '소입로 CP 모니터 값'
 '소입로 온도 2 Zone' '솔트 1존 OP' '솔트 2존 OP' '솔트 슬러지 제거' '솔트 컨베이어 온도 1 Zone'
 '솔트 컨베이어 온도 2 Zone' '솔트조 온도 1 Zone' 'outliers_cnt' 'anomaly']


# Data Processing

#### Delete outliers count & anomaly feature

In [11]:
print(f'''train cols : {len(train.columns)}
valid cols : {len(valid.columns)}
test cols : {len(test.columns)}
anomaly cols : {len(anomaly.columns)}''')
train.drop(columns=['outliers_cnt', 'anomaly'], inplace=True)
valid.drop(columns=['outliers_cnt', 'anomaly'], inplace=True)
test.drop(columns=['outliers_cnt', 'anomaly'], inplace=True)
anomaly.drop(columns=['outliers_cnt', 'anomaly'], inplace=True)
print(f'''train cols : {len(train.columns)}
valid cols : {len(valid.columns)}
test cols : {len(test.columns)}
anomaly cols : {len(anomaly.columns)}''')

train cols : 21
valid cols : 21
test cols : 21
anomaly cols : 21
train cols : 19
valid cols : 19
test cols : 19
anomaly cols : 19


#### Scaling

In [13]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

# For normal Test data
train_scaled = scaler.fit_transform(train)
valid_scaled = scaler.transform(valid)
test_scaled = scaler.transform(test)
anomaly_scaled = scaler.transform(anomaly)

# Dataset


In [15]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cpu


In [17]:
class CustomDataset(Dataset):
    def __init__(self, data):
        self.data = data

        
    def __getitem__(self, index): 
        return self.data[index]
        
    def __len__(self): 
        return self.data.shape[0]

In [22]:
train_dataset = CustomDataset(train_scaled)
valid_dataset = CustomDataset(valid_scaled)
test_dataset = CustomDataset(test_scaled)
anomaly_dataset = CustomDataset(anomaly_scaled)

train_dataloader = DataLoader(train_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=True)
anomaly_dataloader = DataLoader(anomaly_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=True)

# Modeling

In [None]:
class AutoEncoder(nn.Module):
    def __init__(self):
        super(AutoEncoder, self).__init__()
        self.Encoder = nn.Sequential(
            nn.Linear(19,32),
            nn.ReLU(),
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU()
        )

        self.Decoder = nn.Sequential(
            nn.Linear(128,64),
            nn.ReLU(),
            nn.Linear(64,32),
            nn.ReLU(),
            nn.Linear(32,19),
            nn.ReLU(),
        )
        
    def forward(self, x):
        x = self.Encoder(x)
        x = self.Decoder(x)
        return x

# Train

In [None]:
# def train(model, optimizer, train_dataloader, )