## 1. Packages

In [397]:
!pip install numpy pandas scikit-learn seaborn matplotlib optuna torch xgboost catboost lightgbm tqdm requests

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [398]:
import os
from pprint import pprint
import requests
from sklearn.impute import KNNImputer


import numpy as np
import pandas as pd

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, cross_val_score
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix,
    f1_score,
    precision_score,
    make_scorer,
    roc_curve,
    auc,
    precision_recall_curve,
    recall_score,
    silhouette_score,
)

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.cluster import KMeans

import random
import string

from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

import seaborn as sns
import matplotlib.pyplot as plt
import optuna

import torch
import xgboost as xgb
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from catboost import CatBoostClassifier, Pool
from lightgbm import LGBMClassifier, plot_metric
import lightgbm as lgb

from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

from sklearn.model_selection import train_test_split
from tqdm import tqdm

### 2. Functions

In [399]:
f1_scorer = make_scorer(f1_score, pos_label=1, average = 'binary')

def get_clf_eval(y_test, y_pred=None):
    confusion = confusion_matrix(y_test, y_pred, labels=[True, False])
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, labels=[True, False])
    recall = recall_score(y_test, y_pred)
    F1 = f1_score(y_test, y_pred, labels=[True, False])

    print("오차행렬:\n", confusion)
    print("\n정확도: {:.4f}".format(accuracy))
    print("정밀도: {:.4f}".format(precision))
    print("재현율: {:.4f}".format(recall))
    print("F1: {:.4f}".format(F1))

In [400]:
# 중요도 시각화
def plot_feature_importance(importance_dict, feature_names, title):
    plt.figure(figsize=(15, 10))
    
    for i, (importance_type, importance) in enumerate(importance_dict.items()):
        sorted_idx = np.argsort(importance)[::-1]
        sorted_features = np.array(feature_names)[sorted_idx]
        sorted_importance = importance[sorted_idx]
        
        plt.subplot(3, 1, i + 1)
        plt.barh(sorted_features, sorted_importance, color='skyblue')
        plt.xlabel('Importance')
        plt.title(f'Feature Importance by {importance_type.capitalize()}')

    plt.tight_layout()
    plt.show()

In [401]:
def fetch_weather_data(latitude, longitude, start_date, end_date):
    """
    Open-Meteo API를 사용하여 지정된 기간 동안의 온도와 습도 데이터를 가져옵니다.

    Parameters:
        latitude (float): 위도
        longitude (float): 경도
        start_date (str): 시작 날짜 (YYYY-MM-DD)
        end_date (str): 종료 날짜 (YYYY-MM-DD)

    Returns:
        pd.DataFrame: 시간별 온도와 습도가 포함된 DataFrame
    """
    base_url = "https://archive-api.open-meteo.com/v1/archive"
    params = {
        'latitude': latitude,
        'longitude': longitude,
        'start_date': start_date,
        'end_date': end_date,
        'hourly': 'temperature_2m,relativehumidity_2m',
        'timezone': 'Asia/Ho_Chi_Minh'

    }

    try:
        response = requests.get(base_url, params=params)
        response.raise_for_status()
        data = response.json()

        # 시간별 데이터 추출
        times = data.get('hourly', {}).get('time', [])
        temperatures = data.get('hourly', {}).get('temperature_2m', [])
        humidities = data.get('hourly', {}).get('relativehumidity_2m', [])

        # DataFrame 생성
        weather_df = pd.DataFrame({
            'DateTime': pd.to_datetime(times),
            'Temperature': temperatures,
            'Humidity': humidities
        })

        return weather_df

    except requests.exceptions.HTTPError as http_err:
        print(f"HTTP 오류 발생: {http_err}")
    except Exception as err:
        print(f"기타 오류 발생: {err}")
        
# 광저우의 위도와 경도
LATITUDE = 20.861859116973168
LONGITUDE = 106.56603206256099

## 3. Load Data

In [402]:
train = pd.read_csv("./data/train.csv")
test = pd.read_csv("./data/test.csv")
hand_data = pd.read_excel('data/hand_data.xlsx')

## 4. Features

- HEAD NORMAL COORDINATE STAGE1 X좌표의 OK 되어있는 관측치 shift

In [403]:
# divide
dam = train.filter(regex='_Dam')
fill1 = train.filter(regex='_Fill1')
fill2 = train.filter(regex='_Fill2')
autoclave = train.filter(regex='_AutoClave')
target = train['target']

# dam
dam = dam.dropna(axis=1, how='all')
dam = dam.drop(columns='HEAD NORMAL COORDINATE X AXIS(Stage1) Judge Value_Dam')
dam_mask = dam[dam['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'].isin(['OK', np.nan])].iloc[:, 25:].shift(-1, axis = 1).values
dam.loc[dam['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'].isin(['OK', np.nan]), dam.columns[25:]] = dam_mask
dam = dam.drop(columns='WorkMode Collect Result_Dam')

# fill1
fill1 = fill1.dropna(axis=1, how='all')
fill1 = fill1.drop(columns='HEAD NORMAL COORDINATE X AXIS(Stage1) Judge Value_Fill1')
fill1_mask = fill1[fill1['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1'].isin(['OK', np.nan])].iloc[:, 15:].shift(-1, axis = 1).values
fill1.loc[fill1['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1'].isin(['OK', np.nan]), fill1.columns[15:]] = fill1_mask
fill1 = fill1.drop(columns='WorkMode Collect Result_Fill1')

# fill2
fill2 = fill2.dropna(axis=1, how='all')
fill2 = fill2.drop(columns='HEAD NORMAL COORDINATE X AXIS(Stage1) Judge Value_Fill2')
fill2_mask = fill2[fill2['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill2'].isin(['OK', np.nan])].iloc[:, 25:].shift(-1, axis = 1).values
fill2.loc[fill2['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill2'].isin(['OK', np.nan]), fill2.columns[25:]] = fill2_mask
fill2 = fill2.drop(columns='WorkMode Collect Result_Fill2')

# CONCAT
train = pd.concat([dam, fill1, fill2, autoclave, target], axis=1)

# divide
dam_test = test.filter(regex='_Dam')
fill1_test = test.filter(regex='_Fill1')
fill2_test = test.filter(regex='_Fill2')
autoclave_test = test.filter(regex='_AutoClave')

# dam
dam_test = dam_test.dropna(axis=1, how='all')
dam_test = dam_test.drop(columns='HEAD NORMAL COORDINATE X AXIS(Stage1) Judge Value_Dam')
dam_mask_test = dam_test[dam_test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'].isin(['OK', np.nan])].iloc[:, 25:].shift(-1, axis = 1).values
dam_test.loc[dam_test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'].isin(['OK', np.nan]), dam_test.columns[25:]] = dam_mask_test
dam_test = dam_test.drop(columns='WorkMode Collect Result_Dam')

# fill1
fill1_test = fill1_test.dropna(axis=1, how='all')
fill1_test = fill1_test.drop(columns='HEAD NORMAL COORDINATE X AXIS(Stage1) Judge Value_Fill1')
fill1_mask_test = fill1_test[fill1_test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1'].isin(['OK', np.nan])].iloc[:, 15:].shift(-1, axis = 1).values
fill1_test.loc[fill1_test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1'].isin(['OK', np.nan]), fill1_test.columns[15:]] = fill1_mask_test
fill1_test = fill1_test.drop(columns='WorkMode Collect Result_Fill1')

# fill2
fill2_test = fill2_test.dropna(axis=1, how='all')
fill2_test = fill2_test.drop(columns='HEAD NORMAL COORDINATE X AXIS(Stage1) Judge Value_Fill2')
fill2_mask_test = fill2_test[fill2_test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill2'].isin(['OK', np.nan])].iloc[:, 25:].shift(-1, axis = 1).values
fill2_test.loc[fill2_test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill2'].isin(['OK', np.nan]), fill2_test.columns[25:]] = fill2_mask_test
fill2_test = fill2_test.drop(columns='WorkMode Collect Result_Fill2')

# CONCAT
test = pd.concat([dam_test, fill1_test, fill2_test, autoclave_test, test['target']], axis=1)

  dam.loc[dam['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'].isin(['OK', np.nan]), dam.columns[25:]] = dam_mask
  fill2.loc[fill2['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill2'].isin(['OK', np.nan]), fill2.columns[25:]] = fill2_mask
  dam_test.loc[dam_test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'].isin(['OK', np.nan]), dam_test.columns[25:]] = dam_mask_test
  fill2_test.loc[fill2_test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill2'].isin(['OK', np.nan]), fill2_test.columns[25:]] = fill2_mask_test


- Dam, Fill1에 대해 stage별 잘못 기입되어 있는 값 swap

In [404]:
train['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1'] = train['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1'].astype(float)
train['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1'] = train['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1'].astype(float)
train['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1'] = train['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1'].astype(float)

test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1'] = test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1'].astype(float)
test['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1'] = test['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1'].astype(float)
test['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1'] = test['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1'].astype(float)

# 조건 Dam dispenser #1: Equipment_Dam == 'Dam dispenser #1' and Stage1 < 500
condition1 = (train['Equipment_Dam'] == 'Dam dispenser #1') & (train['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1'] < 500)
train.loc[condition1, 'Equipment_Dam'] = 'Dam dispenser #2'

# 조건 Dam dispenser #2: Equipment_Dam == 'Dam dispenser #1' and Stage2 > 800
condition2 = (train['Equipment_Dam'] == 'Dam dispenser #1') & (train['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1'].astype(float) > 800)
train.loc[condition2, 'Equipment_Dam'] = 'Dam dispenser #2'

# 조건 3: Equipment_Dam == 'Dam dispenser #1' and Stage3 > 800
condition3 = (train['Equipment_Dam'] == 'Dam dispenser #1') & (train['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1'] > 800)
train.loc[condition3, 'Equipment_Dam'] = 'Dam dispenser #2'

# 조건 4: Equipment_Dam == 'Dam dispenser #2' and Stage1 < 500
condition4 = (train['Equipment_Dam'] == 'Dam dispenser #2') & (train['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1'] < 500)
train.loc[condition4, 'Equipment_Dam'] = 'Dam dispenser #1'

# 조건 5: Equipment_Dam == 'Dam dispenser #2' and Stage2 > 500
condition5 = (train['Equipment_Dam'] == 'Dam dispenser #2') & (train['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1'] > 500)
train.loc[condition5, 'Equipment_Dam'] = 'Dam dispenser #1'

# 조건 6: Equipment_Dam == 'Dam dispenser #2' and Stage3 > 800
condition6 = (train['Equipment_Dam'] == 'Dam dispenser #2') & (train['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1'] > 800)
train.loc[condition6, 'Equipment_Dam'] = 'Dam dispenser #1'



# 조건 Dam dispenser #1: Equipment_Dam == 'Dam dispenser #1' and Stage1 < 500
condition1 = (test['Equipment_Dam'] == 'Dam dispenser #1') & (test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1'] < 500)
test.loc[condition1, 'Equipment_Dam'] = 'Dam dispenser #2'

# 조건 Dam dispenser #2: Equipment_Dam == 'Dam dispenser #1' and Stage2 > 800
condition2 = (test['Equipment_Dam'] == 'Dam dispenser #1') & (test['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1'].astype(float) > 800)
test.loc[condition2, 'Equipment_Dam'] = 'Dam dispenser #2'

# 조건 3: Equipment_Dam == 'Dam dispenser #1' and Stage3 > 800
condition3 = (test['Equipment_Dam'] == 'Dam dispenser #1') & (test['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1'] > 800)
test.loc[condition3, 'Equipment_Dam'] = 'Dam dispenser #2'

# 조건 4: Equipment_Dam == 'Dam dispenser #2' and Stage1 < 500
condition4 = (test['Equipment_Dam'] == 'Dam dispenser #2') & (test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1'] < 500)
test.loc[condition4, 'Equipment_Dam'] = 'Dam dispenser #1'

# 조건 5: Equipment_Dam == 'Dam dispenser #2' and Stage2 > 500
condition5 = (test['Equipment_Dam'] == 'Dam dispenser #2') & (test['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1'] > 500)
test.loc[condition5, 'Equipment_Dam'] = 'Dam dispenser #1'

# 조건 6: Equipment_Dam == 'Dam dispenser #2' and Stage3 > 800
condition6 = (test['Equipment_Dam'] == 'Dam dispenser #2') & (test['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1'] > 800)
test.loc[condition6, 'Equipment_Dam'] = 'Dam dispenser #1'

In [405]:
# train
train['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'] = train['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'].astype(float)
train['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam'] = train['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam'].astype(float)


# 이동 전
X_sum_down_1 = train[train['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'].astype(float) < 500]['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'].astype(float).mean()
X_sum_down_2 = train[train['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam'].astype(float) < 500]['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam'].astype(float).mean()
X_sum_up_1 = train[train['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'].astype(float) > 500]['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'].astype(float).mean()
X_sum_up_2 = train[train['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam'].astype(float) > 500]['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam'].astype(float).mean()

X_sum_down = (X_sum_down_1 - X_sum_down_2) / 2 # stage1에서 빼고, Stage3에서 더하기 <500
X_sum_up = (X_sum_up_2 - X_sum_up_1) / 2 # stage1에서 더하고, Stage 3에서 빼기

train.loc[train['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'].astype(float) > 500, 'HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'] += X_sum_up
train.loc[train['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam'].astype(float) > 500, 'HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam'] -= X_sum_up

train.loc[train['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'].astype(float) < 500, 'HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'] -= X_sum_down
train.loc[train['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam'].astype(float) < 500, 'HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam'] += X_sum_down

# test
test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'] = test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'].astype(float)
test['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam'] = test['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam'].astype(float)


# 이동 전
test.loc[test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'].astype(float) > 500, 'HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'] += X_sum_up
test.loc[test['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam'].astype(float) > 500, 'HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam'] -= X_sum_up
test.loc[test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'].astype(float) < 500, 'HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'] -= X_sum_down
test.loc[test['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam'].astype(float) < 500, 'HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam'] += X_sum_down

In [406]:
# train
Y_sum_dam_1 = train[train['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam'].astype(float) < 500]['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam'].astype(float).mean()
Y_sum_dam_2 = train[train['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam'].astype(float) > 500]['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam'].astype(float).mean()

train.loc[train['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam'].astype(float) > 500, 'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam'] = Y_sum_dam_1 + Y_sum_dam_2 - train.loc[train['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam'].astype(float) > 500, 'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam']

Y_sum_dam_3 = train[train['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam'].astype(float) < 500]['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam'].astype(float).mean()
Y_sum_dam_4 = train[train['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam'].astype(float) > 500]['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam'].astype(float).mean()

train.loc[train['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam'].astype(float) > 500, 'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam'] = Y_sum_dam_3 + Y_sum_dam_4 - train.loc[train['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam'].astype(float) > 500, 'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam']

Y_sum_dam_5 = train[train['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam'].astype(float) < 500]['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam'].astype(float).mean()
Y_sum_dam_6 = train[train['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam'].astype(float) > 500]['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam'].astype(float).mean()

train.loc[train['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam'].astype(float) > 500, 'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam'] = Y_sum_dam_5 + Y_sum_dam_6 - train.loc[train['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam'].astype(float) > 500, 'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam']

Y_sum_fill_1 = train[train['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1'].astype(float) > 500]['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1'].astype(float).mean()
Y_sum_fill_2 = train[train['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1'].astype(float) < 500]['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1'].astype(float).mean()

train.loc[train['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1'].astype(float) > 500, 'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1'] = Y_sum_fill_1 + Y_sum_fill_2 - train.loc[train['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1'].astype(float) > 500, 'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1']

Y_sum_fill_3 = train[train['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1'].astype(float) > 500]['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1'].astype(float).mean()
Y_sum_fill_4 = train[train['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1'].astype(float) < 500]['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1'].astype(float).mean()

train.loc[train['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1'].astype(float) > 500, 'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1'] = Y_sum_fill_3 + Y_sum_fill_4 - train.loc[train['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1'].astype(float) > 500, 'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1']

Y_sum_fill_5 = train[train['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1'].astype(float) > 500]['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1'].astype(float).mean()
Y_sum_fill_6 = train[train['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1'].astype(float) < 500]['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1'].astype(float).mean()
train.loc[train['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1'].astype(float) > 500, 'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1'] = Y_sum_fill_5 + Y_sum_fill_6 - train.loc[train['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1'].astype(float) > 500, 'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1']


# test
test.loc[test['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam'].astype(float) > 500, 'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam'] = Y_sum_dam_1 + Y_sum_dam_2 - test.loc[test['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam'].astype(float) > 500, 'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam']
test.loc[test['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam'].astype(float) > 500, 'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam'] = Y_sum_dam_5 + Y_sum_dam_6 - test.loc[test['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam'].astype(float) > 500, 'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam']
test.loc[test['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam'].astype(float) > 500, 'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam'] = Y_sum_dam_3 + Y_sum_dam_4 - test.loc[test['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam'].astype(float) > 500, 'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam']
test.loc[test['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1'].astype(float) > 500, 'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1'] = Y_sum_fill_1 + Y_sum_fill_2 - test.loc[test['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1'].astype(float) > 500, 'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1']
test.loc[test['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1'].astype(float) > 500, 'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1'] = Y_sum_fill_5 + Y_sum_fill_6 - test.loc[test['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1'].astype(float) > 500, 'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1']
test.loc[test['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1'].astype(float) > 500, 'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1'] = Y_sum_fill_3 + Y_sum_fill_4 - test.loc[test['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1'].astype(float) > 500, 'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1']

In [407]:
def swap_columns(df, condition, col1, col2):
    # 조건에 해당하는 행 필터링
    filtered_df = df[condition]
    
    # 값 교환
    df.loc[condition, [col1, col2]] = filtered_df[[col1, col2]].copy().iloc[:, ::-1].values

    return df

In [408]:
# 조건을 만족하는 행 인덱스를 찾음
condition = train['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam'].astype(float) >= 200

# DISCHARGED TIME OF RESIN(Stage1) 
swap_columns(train, condition, 'DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam', 'DISCHARGED TIME OF RESIN(Stage3) Collect Result_Dam')

# Dispense Volume(Stage1)
swap_columns(train, condition, 'Dispense Volume(Stage1) Collect Result_Dam', 'Dispense Volume(Stage3) Collect Result_Dam')

# HEAD NORMAL COORDINATE Y AXIS(Stage1)
swap_columns(train, condition, 'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam', 'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam')

# HEAD NORMAL COORDINATE Z AXIS(Stage1)
swap_columns(train, condition, 'HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam', 'HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Dam')

# Stage1 Circle1 Distance Speed Collect
swap_columns(train, condition, 'Stage1 Circle1 Distance Speed Collect Result_Dam', 'Stage3 Circle1 Distance Speed Collect Result_Dam')
swap_columns(train, condition, 'Stage1 Circle2 Distance Speed Collect Result_Dam', 'Stage3 Circle2 Distance Speed Collect Result_Dam')
swap_columns(train, condition, 'Stage1 Circle3 Distance Speed Collect Result_Dam', 'Stage3 Circle3 Distance Speed Collect Result_Dam')
swap_columns(train, condition, 'Stage1 Circle4 Distance Speed Collect Result_Dam', 'Stage3 Circle4 Distance Speed Collect Result_Dam')

# Stage1 Line1 Distance Speed Collect
swap_columns(train, condition, 'Stage1 Line1 Distance Speed Collect Result_Dam', 'Stage3 Line1 Distance Speed Collect Result_Dam')
swap_columns(train, condition, 'Stage1 Line2 Distance Speed Collect Result_Dam', 'Stage3 Line2 Distance Speed Collect Result_Dam')
swap_columns(train, condition, 'Stage1 Line3 Distance Speed Collect Result_Dam', 'Stage3 Line3 Distance Speed Collect Result_Dam')
swap_columns(train, condition, 'Stage1 Line4 Distance Speed Collect Result_Dam', 'Stage3 Line4 Distance Speed Collect Result_Dam')

# THICKNESS 1
swap_columns(train, condition, 'THICKNESS 1 Collect Result_Dam', 'THICKNESS 3 Collect Result_Dam')

### 젤 마지막에 와야됨!!!!
# HEAD NORMAL COORDINATE X AXIS(Stage1)
swap_columns(train, condition, 'HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam', 'HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam')


### Test
# 조건을 만족하는 행 인덱스를 찾음
condition = test['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam'].astype(float) >= 200

# DISCHARGED TIME OF RESIN(Stage1) 
swap_columns(test, condition, 'DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam', 'DISCHARGED TIME OF RESIN(Stage3) Collect Result_Dam')

# Dispense Volume(Stage1)
swap_columns(test, condition, 'Dispense Volume(Stage1) Collect Result_Dam', 'Dispense Volume(Stage3) Collect Result_Dam')

# HEAD NORMAL COORDINATE Y AXIS(Stage1)
swap_columns(test, condition, 'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam', 'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam')

# HEAD NORMAL COORDINATE Z AXIS(Stage1)
swap_columns(test, condition, 'HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam', 'HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Dam')

# Stage1 Circle1 Distance Speed Collect
swap_columns(test, condition, 'Stage1 Circle1 Distance Speed Collect Result_Dam', 'Stage3 Circle1 Distance Speed Collect Result_Dam')
swap_columns(test, condition, 'Stage1 Circle2 Distance Speed Collect Result_Dam', 'Stage3 Circle2 Distance Speed Collect Result_Dam')
swap_columns(test, condition, 'Stage1 Circle3 Distance Speed Collect Result_Dam', 'Stage3 Circle3 Distance Speed Collect Result_Dam')
swap_columns(test, condition, 'Stage1 Circle4 Distance Speed Collect Result_Dam', 'Stage3 Circle4 Distance Speed Collect Result_Dam')

# Stage1 Line1 Distance Speed Collect
swap_columns(test, condition, 'Stage1 Line1 Distance Speed Collect Result_Dam', 'Stage3 Line1 Distance Speed Collect Result_Dam')
swap_columns(test, condition, 'Stage1 Line2 Distance Speed Collect Result_Dam', 'Stage3 Line2 Distance Speed Collect Result_Dam')
swap_columns(test, condition, 'Stage1 Line3 Distance Speed Collect Result_Dam', 'Stage3 Line3 Distance Speed Collect Result_Dam')
swap_columns(test, condition, 'Stage1 Line4 Distance Speed Collect Result_Dam', 'Stage3 Line4 Distance Speed Collect Result_Dam')

# THICKNESS 1
swap_columns(test, condition, 'THICKNESS 1 Collect Result_Dam', 'THICKNESS 3 Collect Result_Dam')

### 젤 마지막에 와야됨!!!!
# HEAD NORMAL COORDINATE X AXIS(Stage1)
swap_columns(test, condition, 'HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam', 'HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam')

print()




In [409]:
### Train
condition = train['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1'].astype(float) > 500

# DISCHARGED TIME OF RESIN(Stage1)
swap_columns(train, condition, 'DISCHARGED TIME OF RESIN(Stage1) Collect Result_Fill1', 'DISCHARGED TIME OF RESIN(Stage2) Collect Result_Fill1')

# Dispense Volume(Stage1)
swap_columns(train, condition, 'Dispense Volume(Stage1) Collect Result_Fill1', 'Dispense Volume(Stage2) Collect Result_Fill1')

# HEAD NORMAL COORDINATE Y AXIS(Stage1)
swap_columns(train, condition, 'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1', 'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1')

# HEAD NORMAL COORDINATE Z AXIS(Stage1)
swap_columns(train, condition, 'HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill1', 'HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1')

# 반드시 마지막으로 와야함!!!!
# HEAD NORMAL COORDINATE X AXIS(Stage1)
swap_columns(train, condition, 'HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1', 'HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1')

### Test
condition = test['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1'].astype(float) > 500

# DISCHARGED TIME OF RESIN(Stage1)
swap_columns(test, condition, 'DISCHARGED TIME OF RESIN(Stage1) Collect Result_Fill1', 'DISCHARGED TIME OF RESIN(Stage2) Collect Result_Fill1')

# Dispense Volume(Stage1)
swap_columns(test, condition, 'Dispense Volume(Stage1) Collect Result_Fill1', 'Dispense Volume(Stage2) Collect Result_Fill1')

# HEAD NORMAL COORDINATE Y AXIS(Stage1)
swap_columns(test, condition, 'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1', 'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1')

# HEAD NORMAL COORDINATE Z AXIS(Stage1)
swap_columns(test, condition, 'HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill1', 'HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1')

# 반드시 마지막으로 와야함!!!!
# HEAD NORMAL COORDINATE X AXIS(Stage1)
swap_columns(test, condition, 'HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1', 'HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1')

print()




In [410]:
### Train
# 조건을 만족하는 행 인덱스를 찾음
condition = train['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1'].astype(float) < 200

# DISCHARGED TIME OF RESIN(Stage1)
swap_columns(train, condition, 'DISCHARGED TIME OF RESIN(Stage1) Collect Result_Fill1', 'DISCHARGED TIME OF RESIN(Stage3) Collect Result_Fill1')

# Dispense Volume(Stage1)
swap_columns(train, condition, 'Dispense Volume(Stage1) Collect Result_Fill1', 'Dispense Volume(Stage3) Collect Result_Fill1')

# HEAD NORMAL COORDINATE Y AXIS(Stage1)
swap_columns(train, condition, 'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1', 'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1')

# HEAD NORMAL COORDINATE Z AXIS(Stage1)
swap_columns(train, condition, 'HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill1', 'HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill1')

# 반드시 마지막으로 와야함!!!!
# HEAD NORMAL COORDINATE X AXIS(Stage1)
swap_columns(train, condition, 'HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1', 'HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1')

### Test
condition = test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1'].astype(float) < 200

# DISCHARGED TIME OF RESIN(Stage1)
swap_columns(test, condition, 'DISCHARGED TIME OF RESIN(Stage1) Collect Result_Fill1', 'DISCHARGED TIME OF RESIN(Stage3) Collect Result_Fill1')

# Dispense Volume(Stage1)
swap_columns(test, condition, 'Dispense Volume(Stage1) Collect Result_Fill1', 'Dispense Volume(Stage3) Collect Result_Fill1')

# HEAD NORMAL COORDINATE Y AXIS(Stage1)
swap_columns(test, condition, 'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1', 'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1')

# HEAD NORMAL COORDINATE Z AXIS(Stage1)
swap_columns(test, condition, 'HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill1', 'HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill1')

# 반드시 마지막으로 와야함!!!!
# HEAD NORMAL COORDINATE X AXIS(Stage1)
swap_columns(test, condition, 'HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1', 'HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1')

print()




In [411]:
df_train = train
df_test = test

In [412]:
# Object형 데이터 float로 바꿔주기
type_change = ['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam', 'HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam', 'HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1']

for i in type_change:
    df_train[i] = df_train[i].astype(float)
    df_test[i] = df_test[i].astype(float)

In [413]:
# Equipment 숫자 1, 2로만 표시하기
train = df_train
test = df_test
train['Equipment_Dam'] = train['Equipment_Dam'].str.slice(15, 16)
train['Equipment_Fill1'] = train['Equipment_Fill1'].str.slice(17, 18)
train['Equipment_Fill2'] = train['Equipment_Fill2'].str.slice(17, 18)

test['Equipment_Dam'] = test['Equipment_Dam'].str.slice(15, 16)
test['Equipment_Fill1'] = test['Equipment_Fill1'].str.slice(17, 18)
test['Equipment_Fill2'] = test['Equipment_Fill2'].str.slice(17, 18)
df_train = train
df_test = test

In [414]:
# Dam, Fill1, Fill2에서 지정된 값이 다를 경우 Abnormal 
def inconsistant(data, columnname, iwantthiscolumnsname, is_train = True):
    # 장비 번호가 다르면 불일치
    if is_train:
        cri = [
            df_train[columnname + '_Dam'] != df_train[columnname + '_Fill1'],
            df_train[columnname + '_Dam'] != df_train[columnname + '_Fill2'],
            df_train[columnname + '_Fill1'] != df_train[columnname + '_Fill2'],
            data[iwantthiscolumnsname] == 1
        ]
        
    else:
        cri = [
            df_test[columnname + '_Dam'] != df_test[columnname + '_Fill1'],
            df_test[columnname + '_Dam'] != df_test[columnname + '_Fill2'],
            df_test[columnname + '_Fill1'] != df_test[columnname + '_Fill1'],
            data[iwantthiscolumnsname] == 1
        ]
    con = [1, 1, 1, 1]

    data[iwantthiscolumnsname] = np.select(cri, con, default = 0)
    
# 불일치 변수
df_train['inconsistant'] = 0
df_test['inconsistant'] = 0

# 기준
columnname = ['Equipment', 'Receip No Collect Result', 'Production Qty Collect Result', 'PalletID Collect Result', ]

# 장착
for i in columnname:
    inconsistant(df_train, i, 'inconsistant', True)
    inconsistant(df_test, i, 'inconsistant', False)

In [415]:
# 시간이 0이하, 900이상인 값은 이상치로 분류
for j in ['Machine Tact time Collect Result_Dam', 'Machine Tact time Collect Result_Fill1', 'Machine Tact time Collect Result_Fill2']:
    cri = [
        df_train[j] <= 0,
        df_train[j] > 900
    ]
    cri2 = [
        df_test[j] <= 0,
        df_test[j] > 900
    ]
    con = [
        1, 1
    ]
    df_train['inconsistant'] = np.select(cri, con, default = df_train['inconsistant'])
    df_test['inconsistant'] = np.select(cri2, con, default = df_test['inconsistant'])

In [416]:
# Equipment에 대한 단일화 작업
df_train['Equipment'] = df_train.apply(
    lambda row: row['Equipment_Dam']
    if row['Equipment_Dam'] == row['Equipment_Fill1'] == row['Equipment_Fill2']
    else -1, axis=1
)

df_train = df_train.drop(columns=['Equipment_Dam', 'Equipment_Fill1', 'Equipment_Fill2'])

df_test['Equipment'] = df_test.apply(
    lambda row: row['Equipment_Dam']
    if row['Equipment_Dam'] == row['Equipment_Fill1'] == row['Equipment_Fill2']
    else -1, axis=1
)

df_test = df_test.drop(columns=['Equipment_Dam', 'Equipment_Fill1', 'Equipment_Fill2'])

In [417]:
# QTY의 단일화
df_train['Production Qty'] = df_train.apply(
    lambda row: row['Production Qty Collect Result_Dam']
    if row['Production Qty Collect Result_Dam'] == row['Production Qty Collect Result_Fill1'] == row['Production Qty Collect Result_Fill2']
    else -1, axis=1
)

df_train = df_train.drop(columns = ['Production Qty Collect Result_Dam', 'Production Qty Collect Result_Fill1', 'Production Qty Collect Result_Fill2'])

df_test['Production Qty'] = df_test.apply(
    lambda row: row['Production Qty Collect Result_Dam']
    if row['Production Qty Collect Result_Dam'] == row['Production Qty Collect Result_Fill1'] == row['Production Qty Collect Result_Fill2']
    else -1, axis=1
)

df_test = df_test.drop(columns = ['Production Qty Collect Result_Dam', 'Production Qty Collect Result_Fill1', 'Production Qty Collect Result_Fill2'])

In [418]:
# Receip No에 대한 단일화 작업
df_train['Receip No'] = df_train.apply(
    lambda row: row['Receip No Collect Result_Dam']
    if row['Receip No Collect Result_Dam'] == row['Receip No Collect Result_Fill1'] == row['Receip No Collect Result_Fill2']
    else -1, axis=1
)

df_train = df_train.drop(columns=['Receip No Collect Result_Dam', 'Receip No Collect Result_Fill1', 'Receip No Collect Result_Fill2'])

df_test['Receip No'] = df_test.apply(
    lambda row: row['Receip No Collect Result_Dam']
    if row['Receip No Collect Result_Dam'] == row['Receip No Collect Result_Fill1'] == row['Receip No Collect Result_Fill2']
    else -1, axis=1
)

df_test = df_test.drop(columns=['Receip No Collect Result_Dam', 'Receip No Collect Result_Fill1', 'Receip No Collect Result_Fill2'])

In [419]:
# PalletID에 대한 단일화 작업
df_train['PalletID'] = df_train.apply(
    lambda row: row['PalletID Collect Result_Dam']
    if row['PalletID Collect Result_Dam'] == row['PalletID Collect Result_Fill1'] == row['PalletID Collect Result_Fill2']
    else -1, axis=1
)

df_train = df_train.drop(columns=['PalletID Collect Result_Dam', 'PalletID Collect Result_Fill1', 'PalletID Collect Result_Fill2'])

df_test['PalletID'] = df_test.apply(
    lambda row: row['PalletID Collect Result_Dam']
    if row['PalletID Collect Result_Dam'] == row['PalletID Collect Result_Fill1'] == row['PalletID Collect Result_Fill2']
    else -1, axis=1
)

df_test = df_test.drop(columns=['PalletID Collect Result_Dam', 'PalletID Collect Result_Fill1', 'PalletID Collect Result_Fill2'])

In [420]:
# 좌표 차이 구하기
df_train['Minus1_Dam']= df_train['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'] - df_train['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam']
df_train['Minus2_Dam']= df_train['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam'] - df_train['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam']

df_test['Minus1_Dam']= df_test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'] - df_test['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam']
df_test['Minus2_Dam']= df_test['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam'] - df_test['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam']

df_train['Minus1Y_Dam']= df_train['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam'] - df_train['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam']
df_train['Minus2Y_Dam']= df_train['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam'] - df_train['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam']

df_test['Minus1Y_Dam']= df_test['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam'] - df_test['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam']
df_test['Minus2Y_Dam']= df_test['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam'] - df_test['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam']

df_train['Minus1Y_Dam'] = df_train['Minus1Y_Dam'].apply(lambda x: 1 if x > 2 or x < -2 else 0)
df_train['Minus2Y_Dam'] = df_train['Minus2Y_Dam'].apply(lambda x: 1 if x > 2 or x < -2 else 0)

df_test['Minus1Y_Dam'] = df_test['Minus1Y_Dam'].apply(lambda x: 1 if x > 2 or x < -2 else 0)
df_test['Minus2Y_Dam'] = df_test['Minus2Y_Dam'].apply(lambda x: 1 if x > 2 or x < -2 else 0)

In [421]:
## 좌표에 대해 표준편차 구하기

# Fill1 
# Train
df_train['HEAD NORMAL COORDINATE X AXIS(Stage1) E Collect Result_Fill1'] = df_train['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1'].astype(float)
df_train['HEAD NORMAL COORDINATE X AXIS(Stage1) E Collect Result_Fill1'] = df_train['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1'] - df_train['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1'].mean()
df_train['HEAD NORMAL COORDINATE Y AXIS(Stage1) E Collect Result_Fill1'] = df_train['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1'] - df_train['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1'].mean()
df_train['HEAD NORMAL COORDINATE Z AXIS(Stage1) E Collect Result_Fill1'] = df_train['HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill1'] - df_train['HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill1'].mean()

df_train['HEAD NORMAL COORDINATE Error (Stage1)_Fill1'] = np.sqrt(df_train['HEAD NORMAL COORDINATE X AXIS(Stage1) E Collect Result_Fill1'] ** 2 + \
                                                               df_train['HEAD NORMAL COORDINATE Y AXIS(Stage1) E Collect Result_Fill1'] ** 2 + \
                                                               df_train['HEAD NORMAL COORDINATE Z AXIS(Stage1) E Collect Result_Fill1'] ** 2)


df_train['HEAD NORMAL COORDINATE X AXIS(Stage2) E Collect Result_Fill1'] = df_train['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1'].astype(float)
df_train['HEAD NORMAL COORDINATE X AXIS(Stage2) E Collect Result_Fill1'] = df_train['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1'] - df_train['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1'].mean()
df_train['HEAD NORMAL COORDINATE Y AXIS(Stage2) E Collect Result_Fill1'] = df_train['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1'] - df_train['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1'].mean()
df_train['HEAD NORMAL COORDINATE Z AXIS(Stage2) E Collect Result_Fill1'] = df_train['HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1'] - df_train['HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1'].mean()

df_train['HEAD NORMAL COORDINATE Error (Stage2)_Fill1'] = np.sqrt(df_train['HEAD NORMAL COORDINATE X AXIS(Stage2) E Collect Result_Fill1'] ** 2 + \
                                                               df_train['HEAD NORMAL COORDINATE Y AXIS(Stage2) E Collect Result_Fill1'] ** 2 + \
                                                               df_train['HEAD NORMAL COORDINATE Z AXIS(Stage2) E Collect Result_Fill1'] ** 2)

df_train['HEAD NORMAL COORDINATE X AXIS(Stage3) E Collect Result_Fill1'] = df_train['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1'].astype(float)
df_train['HEAD NORMAL COORDINATE X AXIS(Stage3) E Collect Result_Fill1'] = df_train['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1'] - df_train['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1'].mean()
df_train['HEAD NORMAL COORDINATE Y AXIS(Stage3) E Collect Result_Fill1'] = df_train['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1'] - df_train['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1'].mean()
df_train['HEAD NORMAL COORDINATE Z AXIS(Stage3) E Collect Result_Fill1'] = df_train['HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill1'] - df_train['HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill1'].mean()

df_train['HEAD NORMAL COORDINATE Error (Stage3)_Fill1'] = np.sqrt(df_train['HEAD NORMAL COORDINATE X AXIS(Stage3) E Collect Result_Fill1'] ** 2 + \
                                                               df_train['HEAD NORMAL COORDINATE Y AXIS(Stage3) E Collect Result_Fill1'] ** 2 + \
                                                               df_train['HEAD NORMAL COORDINATE Z AXIS(Stage3) E Collect Result_Fill1'] ** 2)

# Test
df_test['HEAD NORMAL COORDINATE X AXIS(Stage1) E Collect Result_Fill1'] = df_test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1'].astype(float)
df_test['HEAD NORMAL COORDINATE X AXIS(Stage1) E Collect Result_Fill1'] = df_test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1'] - df_test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1'].mean()
df_test['HEAD NORMAL COORDINATE Y AXIS(Stage1) E Collect Result_Fill1'] = df_test['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1'] - df_test['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1'].mean()
df_test['HEAD NORMAL COORDINATE Z AXIS(Stage1) E Collect Result_Fill1'] = df_test['HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill1'] - df_test['HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill1'].mean()

df_test['HEAD NORMAL COORDINATE Error (Stage1)_Fill1'] = np.sqrt(df_test['HEAD NORMAL COORDINATE X AXIS(Stage1) E Collect Result_Fill1'] ** 2 + \
                                                               df_test['HEAD NORMAL COORDINATE Y AXIS(Stage1) E Collect Result_Fill1'] ** 2 + \
                                                               df_test['HEAD NORMAL COORDINATE Z AXIS(Stage1) E Collect Result_Fill1'] ** 2)


df_test['HEAD NORMAL COORDINATE X AXIS(Stage2) E Collect Result_Fill1'] = df_test['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1'].astype(float)
df_test['HEAD NORMAL COORDINATE X AXIS(Stage2) E Collect Result_Fill1'] = df_test['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1'] - df_test['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1'].mean()
df_test['HEAD NORMAL COORDINATE Y AXIS(Stage2) E Collect Result_Fill1'] = df_test['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1'] - df_test['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1'].mean()
df_test['HEAD NORMAL COORDINATE Z AXIS(Stage2) E Collect Result_Fill1'] = df_test['HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1'] - df_test['HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1'].mean()

df_test['HEAD NORMAL COORDINATE Error (Stage2)_Fill1'] = np.sqrt(df_test['HEAD NORMAL COORDINATE X AXIS(Stage2) E Collect Result_Fill1'] ** 2 + \
                                                               df_test['HEAD NORMAL COORDINATE Y AXIS(Stage2) E Collect Result_Fill1'] ** 2 + \
                                                               df_test['HEAD NORMAL COORDINATE Z AXIS(Stage2) E Collect Result_Fill1'] ** 2)

df_test['HEAD NORMAL COORDINATE X AXIS(Stage3) E Collect Result_Fill1'] = df_test['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1'].astype(float)
df_test['HEAD NORMAL COORDINATE X AXIS(Stage3) E Collect Result_Fill1'] = df_test['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1'] - df_test['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1'].mean()
df_test['HEAD NORMAL COORDINATE Y AXIS(Stage3) E Collect Result_Fill1'] = df_test['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1'] - df_test['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1'].mean()
df_test['HEAD NORMAL COORDINATE Z AXIS(Stage3) E Collect Result_Fill1'] = df_test['HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill1'] - df_test['HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill1'].mean()

df_test['HEAD NORMAL COORDINATE Error (Stage3)_Fill1'] = np.sqrt(df_test['HEAD NORMAL COORDINATE X AXIS(Stage3) E Collect Result_Fill1'] ** 2 + \
                                                               df_test['HEAD NORMAL COORDINATE Y AXIS(Stage3) E Collect Result_Fill1'] ** 2 + \
                                                               df_test['HEAD NORMAL COORDINATE Z AXIS(Stage3) E Collect Result_Fill1'] ** 2)

In [422]:
# Dam
# Train
df_train['HEAD NORMAL COORDINATE X AXIS(Stage1) E Collect Result_Dam'] = df_train['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'].astype(float)
df_train['HEAD NORMAL COORDINATE X AXIS(Stage1) E Collect Result_Dam'] = df_train['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'] - df_train['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'].mean()
df_train['HEAD NORMAL COORDINATE Y AXIS(Stage1) E Collect Result_Dam'] = df_train['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam'] - df_train['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam'].mean()
df_train['HEAD NORMAL COORDINATE Z AXIS(Stage1) E Collect Result_Dam'] = df_train['HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam'] - df_train['HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam'].mean()

df_train['HEAD NORMAL COORDINATE Error (Stage1)_Dam'] = np.sqrt(df_train['HEAD NORMAL COORDINATE X AXIS(Stage1) E Collect Result_Dam'] ** 2 + \
                                                               df_train['HEAD NORMAL COORDINATE Y AXIS(Stage1) E Collect Result_Dam'] ** 2 + \
                                                               df_train['HEAD NORMAL COORDINATE Z AXIS(Stage1) E Collect Result_Dam'] ** 2)


df_train['HEAD NORMAL COORDINATE X AXIS(Stage2) E Collect Result_Dam'] = df_train['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam'].astype(float)
df_train['HEAD NORMAL COORDINATE X AXIS(Stage2) E Collect Result_Dam'] = df_train['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam'] - df_train['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam'].mean()
df_train['HEAD NORMAL COORDINATE Y AXIS(Stage2) E Collect Result_Dam'] = df_train['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam'] - df_train['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam'].mean()
df_train['HEAD NORMAL COORDINATE Z AXIS(Stage2) E Collect Result_Dam'] = df_train['HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Dam'] - df_train['HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Dam'].mean()

df_train['HEAD NORMAL COORDINATE Error (Stage2)_Dam'] = np.sqrt(df_train['HEAD NORMAL COORDINATE X AXIS(Stage2) E Collect Result_Dam'] ** 2 + \
                                                               df_train['HEAD NORMAL COORDINATE Y AXIS(Stage2) E Collect Result_Dam'] ** 2 + \
                                                               df_train['HEAD NORMAL COORDINATE Z AXIS(Stage2) E Collect Result_Dam'] ** 2)

df_train['HEAD NORMAL COORDINATE X AXIS(Stage3) E Collect Result_Dam'] = df_train['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam'].astype(float)
df_train['HEAD NORMAL COORDINATE X AXIS(Stage3) E Collect Result_Dam'] = df_train['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam'] - df_train['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam'].mean()
df_train['HEAD NORMAL COORDINATE Y AXIS(Stage3) E Collect Result_Dam'] = df_train['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam'] - df_train['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam'].mean()
df_train['HEAD NORMAL COORDINATE Z AXIS(Stage3) E Collect Result_Dam'] = df_train['HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Dam'] - df_train['HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Dam'].mean()

df_train['HEAD NORMAL COORDINATE Error (Stage3)_Dam'] = np.sqrt(df_train['HEAD NORMAL COORDINATE X AXIS(Stage3) E Collect Result_Dam'] ** 2 + \
                                                               df_train['HEAD NORMAL COORDINATE Y AXIS(Stage3) E Collect Result_Dam'] ** 2 + \
                                                               df_train['HEAD NORMAL COORDINATE Z AXIS(Stage3) E Collect Result_Dam'] ** 2)

# Test
df_test['HEAD NORMAL COORDINATE X AXIS(Stage1) E Collect Result_Dam'] = df_test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'].astype(float)
df_test['HEAD NORMAL COORDINATE X AXIS(Stage1) E Collect Result_Dam'] = df_test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'] - df_test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'].mean()
df_test['HEAD NORMAL COORDINATE Y AXIS(Stage1) E Collect Result_Dam'] = df_test['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam'] - df_test['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam'].mean()
df_test['HEAD NORMAL COORDINATE Z AXIS(Stage1) E Collect Result_Dam'] = df_test['HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam'] - df_test['HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam'].mean()

df_test['HEAD NORMAL COORDINATE Error (Stage1)_Dam'] = np.sqrt(df_test['HEAD NORMAL COORDINATE X AXIS(Stage1) E Collect Result_Dam'] ** 2 + \
                                                               df_test['HEAD NORMAL COORDINATE Y AXIS(Stage1) E Collect Result_Dam'] ** 2 + \
                                                               df_test['HEAD NORMAL COORDINATE Z AXIS(Stage1) E Collect Result_Dam'] ** 2)


df_test['HEAD NORMAL COORDINATE X AXIS(Stage2) E Collect Result_Dam'] = df_test['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam'].astype(float)
df_test['HEAD NORMAL COORDINATE X AXIS(Stage2) E Collect Result_Dam'] = df_test['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam'] - df_test['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam'].mean()
df_test['HEAD NORMAL COORDINATE Y AXIS(Stage2) E Collect Result_Dam'] = df_test['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam'] - df_test['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam'].mean()
df_test['HEAD NORMAL COORDINATE Z AXIS(Stage2) E Collect Result_Dam'] = df_test['HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Dam'] - df_test['HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Dam'].mean()

df_test['HEAD NORMAL COORDINATE Error (Stage2)_Dam'] = np.sqrt(df_test['HEAD NORMAL COORDINATE X AXIS(Stage2) E Collect Result_Dam'] ** 2 + \
                                                               df_test['HEAD NORMAL COORDINATE Y AXIS(Stage2) E Collect Result_Dam'] ** 2 + \
                                                               df_test['HEAD NORMAL COORDINATE Z AXIS(Stage2) E Collect Result_Dam'] ** 2)

df_test['HEAD NORMAL COORDINATE X AXIS(Stage3) E Collect Result_Dam'] = df_test['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam'].astype(float)
df_test['HEAD NORMAL COORDINATE X AXIS(Stage3) E Collect Result_Dam'] = df_test['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam'] - df_test['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam'].mean()
df_test['HEAD NORMAL COORDINATE Y AXIS(Stage3) E Collect Result_Dam'] = df_test['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam'] - df_test['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam'].mean()
df_test['HEAD NORMAL COORDINATE Z AXIS(Stage3) E Collect Result_Dam'] = df_test['HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Dam'] - df_test['HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Dam'].mean()

df_test['HEAD NORMAL COORDINATE Error (Stage3)_Dam'] = np.sqrt(df_test['HEAD NORMAL COORDINATE X AXIS(Stage3) E Collect Result_Dam'] ** 2 + \
                                                               df_test['HEAD NORMAL COORDINATE Y AXIS(Stage3) E Collect Result_Dam'] ** 2 + \
                                                               df_test['HEAD NORMAL COORDINATE Z AXIS(Stage3) E Collect Result_Dam'] ** 2)

In [423]:
# 압력값 보정 및 온도와 곱하기
df_train['1st Pressure Power_AutoClave'] = (1 - df_train['1st Pressure Collect Result_AutoClave']) * df_train['1st Pressure 1st Pressure Unit Time_AutoClave']
df_train['2nd Pressure Power_AutoClave'] = (1 - df_train['2nd Pressure Collect Result_AutoClave']) * df_train['2nd Pressure Unit Time_AutoClave']
df_train['3rd Pressure Power_AutoClave'] = (1 - df_train['3rd Pressure Collect Result_AutoClave']) * df_train['3rd Pressure Unit Time_AutoClave']

df_train['1st Power x Temp_AutoCLave'] = df_train['1st Pressure Power_AutoClave'] * df_train['Chamber Temp. Collect Result_AutoClave']
df_train['2nd Power x Temp_AutoCLave'] = df_train['2nd Pressure Power_AutoClave'] * df_train['Chamber Temp. Collect Result_AutoClave']
df_train['3rd Power x Temp_AutoCLave'] = df_train['3rd Pressure Power_AutoClave'] * df_train['Chamber Temp. Collect Result_AutoClave']

df_test['1st Pressure Power_AutoClave'] = (1 - df_test['1st Pressure Collect Result_AutoClave']) * df_test['1st Pressure 1st Pressure Unit Time_AutoClave']
df_test['2nd Pressure Power_AutoClave'] = (1 - df_test['2nd Pressure Collect Result_AutoClave']) * df_test['2nd Pressure Unit Time_AutoClave']
df_test['3rd Pressure Power_AutoClave'] = (1 - df_test['3rd Pressure Collect Result_AutoClave']) * df_test['3rd Pressure Unit Time_AutoClave']

df_test['1st Power x Temp_AutoCLave'] = df_test['1st Pressure Power_AutoClave'] * df_test['Chamber Temp. Collect Result_AutoClave']
df_test['2nd Power x Temp_AutoCLave'] = df_test['2nd Pressure Power_AutoClave'] * df_test['Chamber Temp. Collect Result_AutoClave']
df_test['3rd Power x Temp_AutoCLave'] = df_test['3rd Pressure Power_AutoClave'] * df_test['Chamber Temp. Collect Result_AutoClave']

In [424]:
# Line 합 구하기
df_train['Stage1 Line Sum Speed_Dam'] = df_train['Stage1 Line1 Distance Speed Collect Result_Dam'] + df_train['Stage1 Line2 Distance Speed Collect Result_Dam'] + df_train['Stage1 Line3 Distance Speed Collect Result_Dam'] + df_train['Stage1 Line4 Distance Speed Collect Result_Dam']
df_train['Stage2 Line Sum Speed_Dam'] = df_train['Stage2 Line1 Distance Speed Collect Result_Dam'] + df_train['Stage2 Line2 Distance Speed Collect Result_Dam'] + df_train['Stage2 Line3 Distance Speed Collect Result_Dam'] + df_train['Stage2 Line4 Distance Speed Collect Result_Dam']
df_train['Stage3 Line Sum Speed_Dam'] = df_train['Stage3 Line1 Distance Speed Collect Result_Dam'] + df_train['Stage3 Line2 Distance Speed Collect Result_Dam'] + df_train['Stage3 Line3 Distance Speed Collect Result_Dam'] + df_train['Stage3 Line4 Distance Speed Collect Result_Dam']

df_test['Stage1 Line Sum Speed_Dam'] = df_test['Stage1 Line1 Distance Speed Collect Result_Dam'] + df_test['Stage1 Line2 Distance Speed Collect Result_Dam'] + df_test['Stage1 Line3 Distance Speed Collect Result_Dam'] + df_test['Stage1 Line4 Distance Speed Collect Result_Dam']
df_test['Stage2 Line Sum Speed_Dam'] = df_test['Stage2 Line1 Distance Speed Collect Result_Dam'] + df_test['Stage2 Line2 Distance Speed Collect Result_Dam'] + df_test['Stage2 Line3 Distance Speed Collect Result_Dam'] + df_test['Stage2 Line4 Distance Speed Collect Result_Dam']
df_test['Stage3 Line Sum Speed_Dam'] = df_test['Stage3 Line1 Distance Speed Collect Result_Dam'] + df_test['Stage3 Line2 Distance Speed Collect Result_Dam'] + df_test['Stage3 Line3 Distance Speed Collect Result_Dam'] + df_test['Stage3 Line4 Distance Speed Collect Result_Dam']

In [425]:
# 온도 x 시간 x 압력
df_train['1st Pressure x Time x Temp AutoClave'] = df_train['1st Pressure Collect Result_AutoClave']*df_train['1st Pressure 1st Pressure Unit Time_AutoClave']*df_train['Chamber Temp. Collect Result_AutoClave']
df_train['2nd Pressure x Time x Temp AutoClave'] = df_train['2nd Pressure Collect Result_AutoClave']*df_train['2nd Pressure Unit Time_AutoClave']*df_train['Chamber Temp. Collect Result_AutoClave']
df_train['3rd Pressure x Time x Temp AutoClave'] = df_train['3rd Pressure Collect Result_AutoClave']*df_train['3rd Pressure Unit Time_AutoClave']*df_train['Chamber Temp. Collect Result_AutoClave']

df_test['1st Pressure x Time x Temp AutoClave'] = df_test['1st Pressure Collect Result_AutoClave']*df_test['1st Pressure 1st Pressure Unit Time_AutoClave']*df_test['Chamber Temp. Collect Result_AutoClave']
df_test['2nd Pressure x Time x Temp AutoClave'] = df_test['2nd Pressure Collect Result_AutoClave']*df_test['2nd Pressure Unit Time_AutoClave']*df_test['Chamber Temp. Collect Result_AutoClave']
df_test['3rd Pressure x Time x Temp AutoClave'] = df_test['3rd Pressure Collect Result_AutoClave']*df_test['3rd Pressure Unit Time_AutoClave']*df_test['Chamber Temp. Collect Result_AutoClave']

In [426]:
## train
# Stage별 토출량, 토출 속도, 토출 소요시간 데이터
df_train['RESIN Predicted_Volume Stage1 Dam'] = df_train['DISCHARGED SPEED OF RESIN Collect Result_Dam'] * df_train['DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam']
df_train['RESIN Predicted_Volume Stage2 Dam'] = df_train['DISCHARGED SPEED OF RESIN Collect Result_Dam'] * df_train['DISCHARGED TIME OF RESIN(Stage2) Collect Result_Dam']
df_train['RESIN Predicted_Volume Stage3 Dam'] = df_train['DISCHARGED SPEED OF RESIN Collect Result_Dam'] * df_train['DISCHARGED TIME OF RESIN(Stage3) Collect Result_Dam']

# Stage별 실제 토출량과 예측 토출량의 비율(조정 계수) 계산
df_train['Stage1 Scaling_Factor'] = df_train['Dispense Volume(Stage1) Collect Result_Dam'] / df_train['RESIN Predicted_Volume Stage1 Dam']
df_train['Stage2 Scaling_Factor'] = df_train['Dispense Volume(Stage2) Collect Result_Dam'] / df_train['RESIN Predicted_Volume Stage2 Dam']
df_train['Stage3 Scaling_Factor'] = df_train['Dispense Volume(Stage3) Collect Result_Dam'] / df_train['RESIN Predicted_Volume Stage3 Dam']

# Stage별 조정된 예측 토출량 계산
df_train['RESIN Adjusted_Predicted_Volume Stage1 Dam'] = df_train['RESIN Predicted_Volume Stage1 Dam'] * df_train['Stage1 Scaling_Factor'].mean()
df_train['RESIN Adjusted_Predicted_Volume Stage2 Dam'] = df_train['RESIN Predicted_Volume Stage2 Dam'] * df_train['Stage2 Scaling_Factor'].mean()
df_train['RESIN Adjusted_Predicted_Volume Stage3 Dam'] = df_train['RESIN Predicted_Volume Stage3 Dam'] * df_train['Stage3 Scaling_Factor'].mean()

## test
# Stage별 토출량, 토출 속도, 토출 소요시간 데이터
df_test['RESIN Predicted_Volume Stage1 Dam'] = df_test['DISCHARGED SPEED OF RESIN Collect Result_Dam'] * df_test['DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam']
df_test['RESIN Predicted_Volume Stage2 Dam'] = df_test['DISCHARGED SPEED OF RESIN Collect Result_Dam'] * df_test['DISCHARGED TIME OF RESIN(Stage2) Collect Result_Dam']
df_test['RESIN Predicted_Volume Stage3 Dam'] = df_test['DISCHARGED SPEED OF RESIN Collect Result_Dam'] * df_test['DISCHARGED TIME OF RESIN(Stage3) Collect Result_Dam']

# Stage별 실제 토출량과 예측 토출량의 비율(조정 계수) 계산
df_test['Stage1 Scaling_Factor'] = df_test['Dispense Volume(Stage1) Collect Result_Dam'] / df_test['RESIN Predicted_Volume Stage1 Dam']
df_test['Stage2 Scaling_Factor'] = df_test['Dispense Volume(Stage2) Collect Result_Dam'] / df_test['RESIN Predicted_Volume Stage2 Dam']
df_test['Stage3 Scaling_Factor'] = df_test['Dispense Volume(Stage3) Collect Result_Dam'] / df_test['RESIN Predicted_Volume Stage3 Dam']

# Stage별 조정된 예측 토출량 계산
df_test['RESIN Adjusted_Predicted_Volume Stage1 Dam'] = df_test['RESIN Predicted_Volume Stage1 Dam'] * df_test['Stage1 Scaling_Factor'].mean()
df_test['RESIN Adjusted_Predicted_Volume Stage2 Dam'] = df_test['RESIN Predicted_Volume Stage2 Dam'] * df_test['Stage2 Scaling_Factor'].mean()
df_test['RESIN Adjusted_Predicted_Volume Stage3 Dam'] = df_test['RESIN Predicted_Volume Stage3 Dam'] * df_test['Stage3 Scaling_Factor'].mean()


## train
# Stage별 토출량, 토출 속도, 토출 소요시간 데이터
df_train['RESIN Predicted_Volume Stage1 Fill1'] = df_train['DISCHARGED SPEED OF RESIN Collect Result_Fill1'] * df_train['DISCHARGED TIME OF RESIN(Stage1) Collect Result_Fill1']
df_train['RESIN Predicted_Volume Stage2 Fill1'] = df_train['DISCHARGED SPEED OF RESIN Collect Result_Fill1'] * df_train['DISCHARGED TIME OF RESIN(Stage2) Collect Result_Fill1']
df_train['RESIN Predicted_Volume Stage3 Fill1'] = df_train['DISCHARGED SPEED OF RESIN Collect Result_Fill1'] * df_train['DISCHARGED TIME OF RESIN(Stage3) Collect Result_Fill1']

# Stage별 실제 토출량과 예측 토출량의 비율(조정 계수) 계산
df_train['Stage1 Scaling_Factor'] = df_train['Dispense Volume(Stage1) Collect Result_Fill1'] / df_train['RESIN Predicted_Volume Stage1 Fill1']
df_train['Stage2 Scaling_Factor'] = df_train['Dispense Volume(Stage2) Collect Result_Fill1'] / df_train['RESIN Predicted_Volume Stage2 Fill1']
df_train['Stage3 Scaling_Factor'] = df_train['Dispense Volume(Stage3) Collect Result_Fill1'] / df_train['RESIN Predicted_Volume Stage3 Fill1']

# Stage별 조정된 예측 토출량 계산
df_train['RESIN Adjusted_Predicted_Volume Stage1 Fill1'] = df_train['RESIN Predicted_Volume Stage1 Fill1'] * df_train['Stage1 Scaling_Factor'].mean()
df_train['RESIN Adjusted_Predicted_Volume Stage2 Fill1'] = df_train['RESIN Predicted_Volume Stage2 Fill1'] * df_train['Stage2 Scaling_Factor'].mean()
df_train['RESIN Adjusted_Predicted_Volume Stage3 Fill1'] = df_train['RESIN Predicted_Volume Stage3 Fill1'] * df_train['Stage3 Scaling_Factor'].mean()

## test
# Stage별 토출량, 토출 속도, 토출 소요시간 데이터
df_test['RESIN Predicted_Volume Stage1 Fill1'] = df_test['DISCHARGED SPEED OF RESIN Collect Result_Fill1'] * df_test['DISCHARGED TIME OF RESIN(Stage1) Collect Result_Fill1']
df_test['RESIN Predicted_Volume Stage2 Fill1'] = df_test['DISCHARGED SPEED OF RESIN Collect Result_Fill1'] * df_test['DISCHARGED TIME OF RESIN(Stage2) Collect Result_Fill1']
df_test['RESIN Predicted_Volume Stage3 Fill1'] = df_test['DISCHARGED SPEED OF RESIN Collect Result_Fill1'] * df_test['DISCHARGED TIME OF RESIN(Stage3) Collect Result_Fill1']

# Stage별 실제 토출량과 예측 토출량의 비율(조정 계수) 계산
df_test['Stage1 Scaling_Factor'] = df_test['Dispense Volume(Stage1) Collect Result_Fill1'] / df_test['RESIN Predicted_Volume Stage1 Fill1']
df_test['Stage2 Scaling_Factor'] = df_test['Dispense Volume(Stage2) Collect Result_Fill1'] / df_test['RESIN Predicted_Volume Stage2 Fill1']
df_test['Stage3 Scaling_Factor'] = df_test['Dispense Volume(Stage3) Collect Result_Fill1'] / df_test['RESIN Predicted_Volume Stage3 Fill1']

# Stage별 조정된 예측 토출량 계산
df_test['RESIN Adjusted_Predicted_Volume Stage1 Fill1'] = df_test['RESIN Predicted_Volume Stage1 Fill1'] * df_test['Stage1 Scaling_Factor'].mean()
df_test['RESIN Adjusted_Predicted_Volume Stage2 Fill1'] = df_test['RESIN Predicted_Volume Stage2 Fill1'] * df_test['Stage2 Scaling_Factor'].mean()
df_test['RESIN Adjusted_Predicted_Volume Stage3 Fill1'] = df_test['RESIN Predicted_Volume Stage3 Fill1'] * df_test['Stage3 Scaling_Factor'].mean()

In [427]:
# workorder 6자만 분리
df_train['workorder_for'] = df_train['Workorder_Dam'].str.slice(0, 6)
df_test['workorder_for'] = df_test['Workorder_Dam'].str.slice(0, 6)

In [428]:
# tact time 합산
df_train['All Tact Time'] = df_train['Machine Tact time Collect Result_Dam'] + df_train['Chamber Temp. Unit Time_AutoClave'] + df_train['Machine Tact time Collect Result_Fill1'] + df_train['Machine Tact time Collect Result_Fill2']
df_test['All Tact Time'] = df_test['Machine Tact time Collect Result_Dam'] + df_test['Chamber Temp. Unit Time_AutoClave'] + df_test['Machine Tact time Collect Result_Fill1'] + df_test['Machine Tact time Collect Result_Fill2']

df_train['Fill Time'] = df_train['Machine Tact time Collect Result_Fill1'] + df_train['Machine Tact time Collect Result_Fill2']
df_test['Fill Time'] = df_test['Machine Tact time Collect Result_Fill1'] + df_test['Machine Tact time Collect Result_Fill2']

df_train['Dam Fill1 Time'] = df_train['Machine Tact time Collect Result_Dam'] + df_train['Machine Tact time Collect Result_Fill1']
df_test['Dam Fill1 Time'] = df_train['Machine Tact time Collect Result_Dam'] + df_test['Machine Tact time Collect Result_Fill1']

df_train['Dam Fill2 Time'] = df_train['Machine Tact time Collect Result_Dam'] + df_train['Machine Tact time Collect Result_Fill2']
df_test['Dam Fill2 Time'] = df_train['Machine Tact time Collect Result_Dam'] + df_test['Machine Tact time Collect Result_Fill2']

In [429]:
for column in ['Collect Date_Dam', 'Collect Date_Fill1', 'Collect Date_Fill2', 'Collect Date_AutoClave']:
    df_train[column] = pd.to_datetime(df_train[column])

for column in ['Collect Date_Dam', 'Collect Date_Fill1', 'Collect Date_Fill2', 'Collect Date_AutoClave']:
    df_test[column] = pd.to_datetime(df_test[column])
    
df_train['Collect Date'] = pd.to_datetime(df_train['Collect Date_Dam'].dt.date)
df_test['Collect Date'] = pd.to_datetime(df_test['Collect Date_Dam'].dt.date)

In [430]:
df_train['Year'] = df_train['Collect Date'].dt.year
df_train['Month'] = df_train['Collect Date'].dt.month
df_train['Day'] = df_train['Collect Date'].dt.day

df_train['Hour_Dam'] = df_train['Collect Date_Dam'].dt.hour
df_train['Minute_Dam'] = df_train['Collect Date_Dam'].dt.minute

df_train['Hour_Fill1'] = df_train['Collect Date_Fill1'].dt.hour
df_train['Minute_Fill1'] = df_train['Collect Date_Fill1'].dt.minute

df_train['Hour_Fill2'] = df_train['Collect Date_Fill2'].dt.hour
df_train['Minute_Fill2'] = df_train['Collect Date_Fill2'].dt.minute

df_train['Hour_AutoClave'] = df_train['Collect Date_AutoClave'].dt.hour
df_train['Minute_AutoClave'] = df_train['Collect Date_AutoClave'].dt.minute

df_test['Year'] = df_test['Collect Date'].dt.year
df_test['Month'] = df_test['Collect Date'].dt.month
df_test['Day'] = df_test['Collect Date'].dt.day

df_test['Hour_Dam'] = df_test['Collect Date_Dam'].dt.hour
df_test['Minute_Dam'] = df_test['Collect Date_Dam'].dt.minute

df_test['Hour_Fill1'] = df_test['Collect Date_Fill1'].dt.hour
df_test['Minute_Fill1'] = df_test['Collect Date_Fill1'].dt.minute

df_test['Hour_Fill2'] = df_test['Collect Date_Fill2'].dt.hour
df_test['Minute_Fill2'] = df_test['Collect Date_Fill2'].dt.minute

df_test['Hour_AutoClave'] = df_test['Collect Date_AutoClave'].dt.hour
df_test['Minute_AutoClave'] = df_test['Collect Date_AutoClave'].dt.minute

  df_train['Hour_Dam'] = df_train['Collect Date_Dam'].dt.hour
  df_train['Minute_Dam'] = df_train['Collect Date_Dam'].dt.minute
  df_train['Hour_Fill1'] = df_train['Collect Date_Fill1'].dt.hour
  df_train['Minute_Fill1'] = df_train['Collect Date_Fill1'].dt.minute
  df_train['Hour_Fill2'] = df_train['Collect Date_Fill2'].dt.hour
  df_train['Minute_Fill2'] = df_train['Collect Date_Fill2'].dt.minute
  df_train['Hour_AutoClave'] = df_train['Collect Date_AutoClave'].dt.hour
  df_train['Minute_AutoClave'] = df_train['Collect Date_AutoClave'].dt.minute
  df_test['Hour_Dam'] = df_test['Collect Date_Dam'].dt.hour
  df_test['Minute_Dam'] = df_test['Collect Date_Dam'].dt.minute
  df_test['Hour_Fill1'] = df_test['Collect Date_Fill1'].dt.hour
  df_test['Minute_Fill1'] = df_test['Collect Date_Fill1'].dt.minute
  df_test['Hour_Fill2'] = df_test['Collect Date_Fill2'].dt.hour
  df_test['Minute_Fill2'] = df_test['Collect Date_Fill2'].dt.minute
  df_test['Hour_AutoClave'] = df_test['Collect Date_AutoClav

In [431]:
c = ['Year',
'Month',
'Day',
'Hour_Dam',
'Minute_Dam',
'Hour_Fill1',
'Minute_Fill1',
'Hour_Fill2',
'Minute_Fill2',
'Hour_AutoClave',
'Minute_AutoClave']

for i in c:
    df_train[i] = df_train[i].astype(float)
    df_test[i] = df_test[i].astype(float)

In [432]:
df_train['timediff_autoclave_fill2'] = (df_train['Collect Date_AutoClave'] - df_train['Collect Date_Fill2']).dt.total_seconds() // 60
df_test['timediff_autoclave_fill2'] = (df_test['Collect Date_AutoClave'] - df_test['Collect Date_Fill2']).dt.total_seconds() // 60

  df_train['timediff_autoclave_fill2'] = (df_train['Collect Date_AutoClave'] - df_train['Collect Date_Fill2']).dt.total_seconds() // 60
  df_test['timediff_autoclave_fill2'] = (df_test['Collect Date_AutoClave'] - df_test['Collect Date_Fill2']).dt.total_seconds() // 60


In [433]:
# 기상 데이터 가져오기 (여러 번 요청)
from datetime import datetime

df_train['DateTime'] = pd.to_datetime(df_train['Collect Date_Dam'].apply(lambda x: x.replace(minute=0)))
start_date = datetime.strptime('2023-05-04', '%Y-%m-%d').date()
end_date = datetime.strptime('2024-04-28', '%Y-%m-%d').date()

weather_data = fetch_weather_data(LATITUDE, LONGITUDE, start_date, end_date)
print(weather_data)

if weather_data is not None:

    # 병합
    df_train = pd.merge(df_train, weather_data, on='DateTime', how='left')

    # 결과 확인
    print(df_train)
    
    
df_test['DateTime'] = pd.to_datetime(df_test['Collect Date_Dam'].apply(lambda x: x.replace(minute=0)))
start_date = datetime.strptime('2023-05-04', '%Y-%m-%d').date()
end_date = datetime.strptime('2024-04-28', '%Y-%m-%d').date()

weather_data = fetch_weather_data(LATITUDE, LONGITUDE, start_date, end_date)
print(weather_data)

if weather_data is not None:

    # 병합
    df_test = pd.merge(df_test, weather_data, on='DateTime', how='left')

    # 결과 확인
    print(df_test)

  df_train['DateTime'] = pd.to_datetime(df_train['Collect Date_Dam'].apply(lambda x: x.replace(minute=0)))


                DateTime  Temperature  Humidity
0    2023-05-04 00:00:00         25.2        96
1    2023-05-04 01:00:00         25.1        96
2    2023-05-04 02:00:00         25.1        96
3    2023-05-04 03:00:00         25.1        96
4    2023-05-04 04:00:00         25.3        95
...                  ...          ...       ...
8659 2024-04-28 19:00:00         29.1        81
8660 2024-04-28 20:00:00         28.1        87
8661 2024-04-28 21:00:00         27.8        88
8662 2024-04-28 22:00:00         27.6        88
8663 2024-04-28 23:00:00         27.5        88

[8664 rows x 3 columns]
      Wip Line_Dam Process Desc._Dam Model.Suffix_Dam Workorder_Dam  \
0          IVI-OB6     Dam Dispenser      AJX75334505    4F1XA938-1   
1          IVI-OB6     Dam Dispenser      AJX75334505    3KPM0016-2   
2          IVI-OB6     Dam Dispenser      AJX75334501    4E1X9167-1   
3          IVI-OB6     Dam Dispenser      AJX75334501    3K1X0057-1   
4          IVI-OB6     Dam Dispenser      AJ

  df_test['DateTime'] = pd.to_datetime(df_test['Collect Date_Dam'].apply(lambda x: x.replace(minute=0)))


                DateTime  Temperature  Humidity
0    2023-05-04 00:00:00         25.2        96
1    2023-05-04 01:00:00         25.1        96
2    2023-05-04 02:00:00         25.1        96
3    2023-05-04 03:00:00         25.1        96
4    2023-05-04 04:00:00         25.3        95
...                  ...          ...       ...
8659 2024-04-28 19:00:00         29.1        81
8660 2024-04-28 20:00:00         28.1        87
8661 2024-04-28 21:00:00         27.8        88
8662 2024-04-28 22:00:00         27.6        88
8663 2024-04-28 23:00:00         27.5        88

[8664 rows x 3 columns]
      Wip Line_Dam Process Desc._Dam Model.Suffix_Dam Workorder_Dam  \
0          IVI-OB6     Dam Dispenser      AJX75334501    3J1XF767-1   
1          IVI-OB6     Dam Dispenser      AJX75334501    4B1XD472-2   
2          IVI-OB6     Dam Dispenser      AJX75334501    3H1XE355-1   
3          IVI-OB6     Dam Dispenser      AJX75334501    3L1XA128-1   
4          IVI-OB6     Dam Dispenser      AJ

- 칼럼명 변경 및 필요없는 데이터 제거

In [434]:
df_test = df_test.drop(columns= [ 
 'Stage1 Circle2 Distance Speed Collect Result_Dam',
 'Stage1 Circle3 Distance Speed Collect Result_Dam',
 'Stage1 Circle4 Distance Speed Collect Result_Dam', 
 'Stage2 Circle2 Distance Speed Collect Result_Dam',
 'Stage2 Circle3 Distance Speed Collect Result_Dam',
 'Stage2 Circle4 Distance Speed Collect Result_Dam', 
 'Stage3 Circle2 Distance Speed Collect Result_Dam',
 'Stage3 Circle3 Distance Speed Collect Result_Dam',
 'Stage3 Circle4 Distance Speed Collect Result_Dam'] )

df_test = df_test.rename(columns={'Stage1 Circle1 Distance Speed Collect Result_Dam': 'Stage1 Circle Distance Speed_Dam', 
                                    'Stage2 Circle1 Distance Speed Collect Result_Dam': 'Stage2 Circle Distance Speed_Dam',
                                    'Stage3 Circle1 Distance Speed Collect Result_Dam': 'Stage3 Circle Distance Speed_Dam'})

# Dam, Fill2의 경우 Z값이 서로 같다. -> 그렇다면 Fill1은 높이값에서 흔들린 경우가 있다는 것을 의미한다.
df_test = df_test.drop(columns= [
    'HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Dam',
    'HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Dam',
    'HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill2',
    'HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill2'
])

df_test = df_test.rename(columns={'HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill2': 'HEAD NORMAL COORDINATE Z AXIS_Fill2', 
                                    'HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam': 'HEAD NORMAL COORDINATE Z AXIS_Dam',
                                    })

# Model.Suffix, Workorder이 같다.
df_test = df_test.drop(columns=['Model.Suffix_Fill1', 'Model.Suffix_Fill2', 'Model.Suffix_AutoClave'])
df_test = df_test.drop(columns=['Workorder_Fill1', 'Workorder_Fill2', 'Workorder_AutoClave'])
df_test = df_test.rename(columns={'Workorder_Dam': 'Workorder', 'Model.Suffix_Dam': 'Model.Suffix'})

# 의미를 찾을 수 없는 컬럼들 제거
df_test = df_test.drop(columns=['Wip Line_Fill1', 
                                  'Process Desc._Fill1', 
                                  'Insp. Seq No._Fill1', 
                                  'Insp Judge Code_Fill1', 
                                  'Equipment_AutoClave',
                                  'Process Desc._AutoClave', 
                                  'Wip Line_AutoClave', 
                                  'Insp Judge Code_AutoClave',
                                  'Insp. Seq No._AutoClave',
                                  '1st Pressure Judge Value_AutoClave', 
                                  '2nd Pressure Judge Value_AutoClave', 
                                  '3rd Pressure Judge Value_AutoClave', 
                                  'GMES_ORIGIN_INSP_JUDGE_CODE Collect Result_AutoClave',
                                  'GMES_ORIGIN_INSP_JUDGE_CODE Judge Value_AutoClave',
                                  'GMES_ORIGIN_INSP_JUDGE_CODE Unit Time_AutoClave',
                                  'Wip Line_Fill2', 
                                  'Process Desc._Fill2', 
                                  'Insp. Seq No._Fill2', 
                                  'Insp Judge Code_Fill2', 
                                  'Wip Line_Dam', 
                                  'Process Desc._Dam', 
                                  'Insp. Seq No._Dam', 
                                  'Insp Judge Code_Dam',
                                  'CURE END POSITION X Collect Result_Dam',
                                  'CURE END POSITION Z Collect Result_Dam',
                                  'CURE END POSITION Θ Collect Result_Dam',
                                  'CURE STANDBY POSITION X Collect Result_Dam',
                                  'CURE STANDBY POSITION Z Collect Result_Dam',
                                  'CURE STANDBY POSITION Θ Collect Result_Dam',
                                  ])  

# Fill2는 레진을 살포하지 않는다. UV만 진행하는 과정이므로 싹 삭제해 준다.          
df_test = df_test.drop(columns=['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill2',
                                'HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill2',
                                'HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill2',
                                'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill2',
                                'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill2',
                                'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill2',
                                'HEAD NORMAL COORDINATE Z AXIS_Fill2',
                                'HEAD Standby Position X Collect Result_Fill2',
                                'HEAD Standby Position Y Collect Result_Fill2',
                                'HEAD Standby Position Z Collect Result_Fill2',
                                'Head Clean Position X Collect Result_Fill2',
                                'Head Clean Position Y Collect Result_Fill2',
                                'Head Clean Position Z Collect Result_Fill2',
                                'Head Purge Position X Collect Result_Fill2',
                                'Head Purge Position Y Collect Result_Fill2',
                                'Head Purge Position Z Collect Result_Fill2',
                                'DISCHARGED SPEED OF RESIN Collect Result_Fill2',
                                'DISCHARGED TIME OF RESIN(Stage1) Collect Result_Fill2',
                                'DISCHARGED TIME OF RESIN(Stage2) Collect Result_Fill2',
                                'DISCHARGED TIME OF RESIN(Stage3) Collect Result_Fill2',
                                'Dispense Volume(Stage1) Collect Result_Fill2',
                                'Dispense Volume(Stage2) Collect Result_Fill2',
                                'Dispense Volume(Stage3) Collect Result_Fill2',])  

# # 라인별로 속도가 같아야 정상이다.
df_test['Stage1 Line diffent Distance Speed_Dam'] = ((df_test['Stage1 Line1 Distance Speed Collect Result_Dam'] != df_test['Stage1 Line2 Distance Speed Collect Result_Dam']) |
                                                  (df_test['Stage1 Line1 Distance Speed Collect Result_Dam'] != df_test['Stage1 Line3 Distance Speed Collect Result_Dam']) |
                                                  (df_test['Stage1 Line1 Distance Speed Collect Result_Dam'] != df_test['Stage1 Line4 Distance Speed Collect Result_Dam']) |
                                                  (df_test['Stage1 Line3 Distance Speed Collect Result_Dam'] != df_test['Stage1 Line4 Distance Speed Collect Result_Dam'])).astype(int)

df_test['Stage2 Line diffent Distance Speed_Dam'] = ((df_test['Stage2 Line1 Distance Speed Collect Result_Dam'] != df_test['Stage2 Line2 Distance Speed Collect Result_Dam']) |
                                                  (df_test['Stage2 Line1 Distance Speed Collect Result_Dam'] != df_test['Stage2 Line3 Distance Speed Collect Result_Dam']) |
                                                  (df_test['Stage2 Line1 Distance Speed Collect Result_Dam'] != df_test['Stage2 Line4 Distance Speed Collect Result_Dam']) |
                                                  (df_test['Stage2 Line3 Distance Speed Collect Result_Dam'] != df_test['Stage2 Line4 Distance Speed Collect Result_Dam'])).astype(int)

df_test['Stage3 Line diffent Distance Speed_Dam'] = ((df_test['Stage3 Line1 Distance Speed Collect Result_Dam'] != df_test['Stage3 Line2 Distance Speed Collect Result_Dam']) |
                                                  (df_test['Stage3 Line1 Distance Speed Collect Result_Dam'] != df_test['Stage3 Line3 Distance Speed Collect Result_Dam']) |
                                                  (df_test['Stage3 Line1 Distance Speed Collect Result_Dam'] != df_test['Stage3 Line4 Distance Speed Collect Result_Dam']) |
                                                  (df_test['Stage3 Line3 Distance Speed Collect Result_Dam'] != df_test['Stage3 Line4 Distance Speed Collect Result_Dam'])).astype(int)


# 단일값이 하나인 컬럼들, 의미를 찾고싶다면 주석처리 해야하는 것들
df_test = df_test.drop(columns=['CURE START POSITION X Collect Result_Dam', # Equipment에 따라서 정해지며, 하나로 책정됨.
                                'CURE START POSITION Z Collect Result_Dam', # START POSITION
                                'CURE START POSITION Θ Collect Result_Dam', # Equipment에 따라서 정해지며, 하나로 책정됨.
                                'HEAD Standby Position X Collect Result_Dam',
                                'HEAD Standby Position Y Collect Result_Dam',
                                'HEAD Standby Position Z Collect Result_Dam',
                                'Head Clean Position X Collect Result_Dam',
                                'Head Clean Position Y Collect Result_Dam', # 흔들림에 따라 Z
                                'Head Purge Position X Collect Result_Dam',
                                'Head Purge Position Y Collect Result_Dam',
                                'Head Zero Position X Collect Result_Dam',
                                'HEAD Standby Position X Collect Result_Fill1',
                                'HEAD Standby Position Y Collect Result_Fill1',
                                'HEAD Standby Position Z Collect Result_Fill1',
                                'Head Clean Position X Collect Result_Fill1',
                                'Head Clean Position Y Collect Result_Fill1',
                                'Head Clean Position Z Collect Result_Fill1',
                                'Head Purge Position X Collect Result_Fill1',
                                'Head Purge Position Y Collect Result_Fill1',
                                'CURE END POSITION X Collect Result_Fill2',
                                'CURE END POSITION Θ Collect Result_Fill2',
                                'CURE STANDBY POSITION X Collect Result_Fill2',
                                'CURE STANDBY POSITION Z Collect Result_Fill2',
                                'CURE STANDBY POSITION Θ Collect Result_Fill2',
                                'CURE START POSITION X Collect Result_Fill2',
                                'CURE START POSITION Θ Collect Result_Fill2',
                                ])


# df_test = df_test.drop(columns = [ 'HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1',
#                                    'HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1',
#                                    'HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1',
#                                    'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1',
#                                    'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1',
#                                    'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1',
#                                    'HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill1',
#                                    'HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1',
#                                    'HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill1',
#                                 ])

In [435]:
# 값들이 같은 컬럼 하나로 합치는 과정
# 같은 Stage에 Circle 값들끼리 같다.
df_train = df_train.drop(columns= [ 
 'Stage1 Circle2 Distance Speed Collect Result_Dam',
 'Stage1 Circle3 Distance Speed Collect Result_Dam',
 'Stage1 Circle4 Distance Speed Collect Result_Dam', 
 'Stage2 Circle2 Distance Speed Collect Result_Dam',
 'Stage2 Circle3 Distance Speed Collect Result_Dam',
 'Stage2 Circle4 Distance Speed Collect Result_Dam', 
 'Stage3 Circle2 Distance Speed Collect Result_Dam',
 'Stage3 Circle3 Distance Speed Collect Result_Dam',
 'Stage3 Circle4 Distance Speed Collect Result_Dam'] )

df_train = df_train.rename(columns={'Stage1 Circle1 Distance Speed Collect Result_Dam': 'Stage1 Circle Distance Speed_Dam', 
                                    'Stage2 Circle1 Distance Speed Collect Result_Dam': 'Stage2 Circle Distance Speed_Dam',
                                    'Stage3 Circle1 Distance Speed Collect Result_Dam': 'Stage3 Circle Distance Speed_Dam'})

# Dam, Fill2의 경우 Z값이 서로 같다. -> 그렇다면 Fill1은 높이값에서 흔들린 경우가 있다는 것을 의미한다.
df_train = df_train.drop(columns= [
    'HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Dam',
    'HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Dam',
    'HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill2',
    'HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill2'
])

df_train = df_train.rename(columns={'HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill2': 'HEAD NORMAL COORDINATE Z AXIS_Fill2', 
                                    'HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam': 'HEAD NORMAL COORDINATE Z AXIS_Dam',
                                    })

# Model.Suffix, Workorder이 같다.
df_train = df_train.drop(columns=['Model.Suffix_Fill1', 'Model.Suffix_Fill2', 'Model.Suffix_AutoClave'])
df_train = df_train.drop(columns=['Workorder_Fill1', 'Workorder_Fill2', 'Workorder_AutoClave'])
df_train = df_train.rename(columns={'Workorder_Dam': 'Workorder', 'Model.Suffix_Dam': 'Model.Suffix'})


# 의미를 찾을 수 없는 컬럼들 제거
df_train = df_train.drop(columns=['Wip Line_Fill1', 
                                  'Process Desc._Fill1', 
                                  'Insp. Seq No._Fill1', 
                                  'Insp Judge Code_Fill1', 
                                  'Equipment_AutoClave',
                                  'Process Desc._AutoClave', 
                                  'Wip Line_AutoClave', 
                                  'Insp Judge Code_AutoClave',
                                  'Insp. Seq No._AutoClave',
                                  '1st Pressure Judge Value_AutoClave', 
                                  '2nd Pressure Judge Value_AutoClave', 
                                  '3rd Pressure Judge Value_AutoClave', 
                                  'GMES_ORIGIN_INSP_JUDGE_CODE Collect Result_AutoClave',
                                  'GMES_ORIGIN_INSP_JUDGE_CODE Judge Value_AutoClave',
                                  'GMES_ORIGIN_INSP_JUDGE_CODE Unit Time_AutoClave',
                                  'Wip Line_Fill2', 
                                  'Process Desc._Fill2', 
                                  'Insp. Seq No._Fill2', 
                                  'Insp Judge Code_Fill2', 
                                  'Wip Line_Dam', 
                                  'Process Desc._Dam', 
                                  'Insp. Seq No._Dam', 
                                  'Insp Judge Code_Dam',
                                  ])  

# Fill2는 레진을 살포하지 않는다. UV만 진행하는 과정이므로 싹 삭제해 준다.          
df_train = df_train.drop(columns=['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill2',
                                'HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill2',
                                'HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill2',
                                'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill2',
                                'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill2',
                                'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill2',
                                'HEAD NORMAL COORDINATE Z AXIS_Fill2',
                                'HEAD Standby Position X Collect Result_Fill2',
                                'HEAD Standby Position Y Collect Result_Fill2',
                                'HEAD Standby Position Z Collect Result_Fill2',
                                'Head Clean Position X Collect Result_Fill2',
                                'Head Clean Position Y Collect Result_Fill2',
                                'Head Clean Position Z Collect Result_Fill2',
                                'Head Purge Position X Collect Result_Fill2',
                                'Head Purge Position Y Collect Result_Fill2',
                                'Head Purge Position Z Collect Result_Fill2',
                                'DISCHARGED SPEED OF RESIN Collect Result_Fill2',
                                'DISCHARGED TIME OF RESIN(Stage1) Collect Result_Fill2',
                                'DISCHARGED TIME OF RESIN(Stage2) Collect Result_Fill2',
                                'DISCHARGED TIME OF RESIN(Stage3) Collect Result_Fill2',
                                'Dispense Volume(Stage1) Collect Result_Fill2',
                                'Dispense Volume(Stage2) Collect Result_Fill2',
                                'Dispense Volume(Stage3) Collect Result_Fill2',])  

# 라인별로 속도가 같은 경우, 다른 경우가 있다.
df_train['Stage1 Line diffent Distance Speed_Dam'] = ((df_train['Stage1 Line1 Distance Speed Collect Result_Dam'] != df_train['Stage1 Line2 Distance Speed Collect Result_Dam']) |
                                                  (df_train['Stage1 Line1 Distance Speed Collect Result_Dam'] != df_train['Stage1 Line3 Distance Speed Collect Result_Dam']) |
                                                  (df_train['Stage1 Line1 Distance Speed Collect Result_Dam'] != df_train['Stage1 Line4 Distance Speed Collect Result_Dam']) |
                                                  (df_train['Stage1 Line3 Distance Speed Collect Result_Dam'] != df_train['Stage1 Line4 Distance Speed Collect Result_Dam'])).astype(int)

df_train['Stage2 Line diffent Distance Speed_Dam'] = ((df_train['Stage2 Line1 Distance Speed Collect Result_Dam'] != df_train['Stage2 Line2 Distance Speed Collect Result_Dam']) |
                                                  (df_train['Stage2 Line1 Distance Speed Collect Result_Dam'] != df_train['Stage2 Line3 Distance Speed Collect Result_Dam']) |
                                                  (df_train['Stage2 Line1 Distance Speed Collect Result_Dam'] != df_train['Stage2 Line4 Distance Speed Collect Result_Dam']) |
                                                  (df_train['Stage2 Line3 Distance Speed Collect Result_Dam'] != df_train['Stage2 Line4 Distance Speed Collect Result_Dam'])).astype(int)

df_train['Stage3 Line diffent Distance Speed_Dam'] = ((df_train['Stage3 Line1 Distance Speed Collect Result_Dam'] != df_train['Stage3 Line2 Distance Speed Collect Result_Dam']) |
                                                  (df_train['Stage3 Line1 Distance Speed Collect Result_Dam'] != df_train['Stage3 Line3 Distance Speed Collect Result_Dam']) |
                                                  (df_train['Stage3 Line1 Distance Speed Collect Result_Dam'] != df_train['Stage3 Line4 Distance Speed Collect Result_Dam']) |
                                                  (df_train['Stage3 Line3 Distance Speed Collect Result_Dam'] != df_train['Stage3 Line4 Distance Speed Collect Result_Dam'])).astype(int)

# 단일값이 하나인 컬럼들, 의미를 찾고싶다면 주석처리 해야하는 것들
df_train = df_train.drop(columns=['CURE START POSITION X Collect Result_Dam', 
                                'CURE START POSITION Z Collect Result_Dam', 
                                'CURE START POSITION Θ Collect Result_Dam',
                                'HEAD Standby Position X Collect Result_Dam',
                                'HEAD Standby Position Y Collect Result_Dam',
                                'HEAD Standby Position Z Collect Result_Dam',
                                'Head Clean Position X Collect Result_Dam',
                                'Head Clean Position Y Collect Result_Dam',
                                'Head Purge Position X Collect Result_Dam',
                                'Head Purge Position Y Collect Result_Dam',
                                'Head Zero Position X Collect Result_Dam',
                                'HEAD Standby Position X Collect Result_Fill1',
                                'HEAD Standby Position Y Collect Result_Fill1',
                                'HEAD Standby Position Z Collect Result_Fill1',
                                'Head Clean Position X Collect Result_Fill1',
                                'Head Clean Position Y Collect Result_Fill1',
                                'Head Clean Position Z Collect Result_Fill1',
                                'Head Purge Position X Collect Result_Fill1',
                                'Head Purge Position Y Collect Result_Fill1',
                                'CURE END POSITION X Collect Result_Fill2',
                                'CURE END POSITION Θ Collect Result_Fill2',
                                'CURE STANDBY POSITION X Collect Result_Fill2',
                                'CURE STANDBY POSITION Z Collect Result_Fill2',
                                'CURE STANDBY POSITION Θ Collect Result_Fill2',
                                'CURE START POSITION X Collect Result_Fill2',
                                'CURE START POSITION Θ Collect Result_Fill2',
                                'CURE END POSITION X Collect Result_Dam',
                                'CURE END POSITION Z Collect Result_Dam',
                                'CURE END POSITION Θ Collect Result_Dam',
                                'CURE STANDBY POSITION X Collect Result_Dam',
                                'CURE STANDBY POSITION Z Collect Result_Dam',
                                'CURE STANDBY POSITION Θ Collect Result_Dam',
                                ])

# df_train = df_train.drop(columns = ['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1',
#                                    'HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1',
#                                    'HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1',
#                                    'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1',
#                                    'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1',
#                                    'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1',
#                                    'HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill1',
#                                    'HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1',
#                                    'HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill1',
#                                    ])


In [436]:
# 레이블 인코딩
label_encoders = {}
categorical_features = ['Model.Suffix','workorder_for', 'Chamber Temp. Judge Value_AutoClave']

for feature in categorical_features:
    le = LabelEncoder()
    df_train[feature] = le.fit_transform(df_train[feature])

    df_test[feature] = le.transform(df_test[feature])
    label_encoders[feature] = le

## 5. Modeling

### 선택할 칼럼 재 지정하기

In [437]:
column = \
[
#     'Temperature',
#     'Humidity',
'timediff_autoclave_fill2',
# 'Year',
'Month',
# 'Day',
'Hour_Dam',
# 'Minute_Dam',
'Hour_Fill1',
# 'Minute_Fill1',
'Hour_Fill2',
# 'Minute_Fill2',
'Hour_AutoClave',
# 'Minute_AutoClave',   
#  'Workorder',
 'Production Qty',
 'Receip No',  
 'Model.Suffix',
 'inconsistant',
 'Equipment',
 'PalletID',
#  'workorder_first',
#  'workorder_third',
    'workorder_for',
 
 'CURE SPEED Collect Result_Dam',
 'DISCHARGED SPEED OF RESIN Collect Result_Dam', # 범주 가능
 
 'DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam', # 10 미만이냐 아니냐.
 'DISCHARGED TIME OF RESIN(Stage2) Collect Result_Dam', # 4 미만이냐 아니냐.
 'DISCHARGED TIME OF RESIN(Stage3) Collect Result_Dam', # 10 미만이냐 아니냐
 'Dispense Volume(Stage1) Collect Result_Dam',
 'Dispense Volume(Stage2) Collect Result_Dam', # 0.3미만이냐 아니냐
 'Dispense Volume(Stage3) Collect Result_Dam',
 'HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam',
 'HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam',
 'HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam',
 'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam',
 'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam',
 'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam',
 'HEAD NORMAL COORDINATE Z AXIS_Dam',
 'Head Clean Position Z Collect Result_Dam',
 'Head Purge Position Z Collect Result_Dam',
 'Head Zero Position Y Collect Result_Dam',
 'Head Zero Position Z Collect Result_Dam',
 'Machine Tact time Collect Result_Dam',
 'Stage1 Circle Distance Speed_Dam',
#  'Stage1 Line1 Distance Speed Collect Result_Dam',
#  'Stage1 Line2 Distance Speed Collect Result_Dam',
#  'Stage1 Line3 Distance Speed Collect Result_Dam',
#  'Stage1 Line4 Distance Speed Collect Result_Dam',
 'Stage2 Circle Distance Speed_Dam',
#  'Stage2 Line1 Distance Speed Collect Result_Dam',
#  'Stage2 Line2 Distance Speed Collect Result_Dam',
#  'Stage2 Line3 Distance Speed Collect Result_Dam',
#  'Stage2 Line4 Distance Speed Collect Result_Dam',
 'Stage3 Circle Distance Speed_Dam',
#  'Stage3 Line1 Distance Speed Collect Result_Dam',
#  'Stage3 Line2 Distance Speed Collect Result_Dam',
#  'Stage3 Line3 Distance Speed Collect Result_Dam',
#  'Stage3 Line4 Distance Speed Collect Result_Dam',
 'THICKNESS 1 Collect Result_Dam',
 'THICKNESS 2 Collect Result_Dam',
 'THICKNESS 3 Collect Result_Dam',
 
 'DISCHARGED SPEED OF RESIN Collect Result_Fill1', # 범주 가능
 'DISCHARGED TIME OF RESIN(Stage1) Collect Result_Fill1', # 17.4 이상이냐 아니냐
 'DISCHARGED TIME OF RESIN(Stage2) Collect Result_Fill1', # 5 초과냐 아니냐
 'DISCHARGED TIME OF RESIN(Stage3) Collect Result_Fill1', # 17 이상이냐 아니냐
 'Dispense Volume(Stage1) Collect Result_Fill1', # 12 미만이냐 아니냐
 'Dispense Volume(Stage2) Collect Result_Fill1', # 4.5 초과냐 아니냐
 'Dispense Volume(Stage3) Collect Result_Fill1', # 12 미만이냐 아니냐
 'HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1',
 'HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1',
 'HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1',
 'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1',
 'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1',
 'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1',
 'HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill1',
 'HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1',
 'HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill1',
 'Head Purge Position Z Collect Result_Fill1',
 'Machine Tact time Collect Result_Fill1',
 
 'CURE END POSITION Z Collect Result_Fill2',
 'CURE SPEED Collect Result_Fill2',
 'CURE START POSITION Z Collect Result_Fill2',
 'Machine Tact time Collect Result_Fill2',
 '1st Pressure Collect Result_AutoClave',
 '1st Pressure 1st Pressure Unit Time_AutoClave',
 '2nd Pressure Collect Result_AutoClave',
 '2nd Pressure Unit Time_AutoClave',
 '3rd Pressure Collect Result_AutoClave',
 '3rd Pressure Unit Time_AutoClave',
 'Chamber Temp. Collect Result_AutoClave',
 'Chamber Temp. Unit Time_AutoClave',
 'Chamber Temp. Judge Value_AutoClave',
#  '1st_Pressure_PCA',
#  '2nd_Pressure_PCA',
#  '3rd_Pressure_PCA',
 
#  'Minus1_Dam',
#  'Minus2_Dam',
 'Minus1Y_Dam',
 'Minus2Y_Dam',
#  'HEAD NORMAL COORDINATE Error (Stage1)_Fill1',
#  'HEAD NORMAL COORDINATE Error (Stage2)_Fill1',
#  'HEAD NORMAL COORDINATE Error (Stage3)_Fill1',
#  'HEAD NORMAL COORDINATE Error (Stage1)_Dam',
#  'HEAD NORMAL COORDINATE Error (Stage2)_Dam',
#  'HEAD NORMAL COORDINATE Error (Stage3)_Dam',
#  '1st Pressure Power_AutoClave',
#  '2nd Pressure Power_AutoClave',
#  '3rd Pressure Power_AutoClave',
#  '1st Power x Temp_AutoCLave',
#  '2nd Power x Temp_AutoCLave',
#  '3rd Power x Temp_AutoCLave',
 'Stage1 Line Sum Speed_Dam',
 'Stage2 Line Sum Speed_Dam',
 'Stage3 Line Sum Speed_Dam',
#  '1st Pressure x Time x Temp AutoClave',
#  '2nd Pressure x Time x Temp AutoClave',
#  '3rd Pressure x Time x Temp AutoClave',
#  'RESIN Predicted_Volume Stage1 Dam',
#  'RESIN Predicted_Volume Stage2 Dam',
#  'RESIN Predicted_Volume Stage3 Dam',
#  'Stage1 Scaling_Factor',
#  'Stage2 Scaling_Factor',
#  'Stage3 Scaling_Factor',
#  'RESIN Adjusted_Predicted_Volume Stage1 Dam',
#  'RESIN Adjusted_Predicted_Volume Stage2 Dam',
#  'RESIN Adjusted_Predicted_Volume Stage3 Dam',
#  'RESIN Predicted_Volume Stage1 Fill1',
#  'RESIN Predicted_Volume Stage2 Fill1',
#  'RESIN Predicted_Volume Stage3 Fill1',
#  'RESIN Adjusted_Predicted_Volume Stage1 Fill1',
#  'RESIN Adjusted_Predicted_Volume Stage2 Fill1',
#  'RESIN Adjusted_Predicted_Volume Stage3 Fill1',
 'Stage1 Line diffent Distance Speed_Dam',
 'Stage2 Line diffent Distance Speed_Dam',
 'Stage3 Line diffent Distance Speed_Dam',
#  'kmeans',
 
 'target']



column_t = [
    
#     'Temperature',
#     'Humidity',
    
'timediff_autoclave_fill2',
# 'Year',
'Month',
# 'Day',
'Hour_Dam',
# 'Minute_Dam',
'Hour_Fill1',
# 'Minute_Fill1',
'Hour_Fill2',
# 'Minute_Fill2',
'Hour_AutoClave',
# 'Minute_AutoClave',
#  'Workorder',

 'Production Qty',
 'Receip No',  
 'Model.Suffix',
 'inconsistant',
 'Equipment',
 'PalletID',
#  'workorder_first',
#  'workorder_third',
    'workorder_for',
 
 'CURE SPEED Collect Result_Dam',
 'DISCHARGED SPEED OF RESIN Collect Result_Dam',
 'DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam',
 'DISCHARGED TIME OF RESIN(Stage2) Collect Result_Dam',
 'DISCHARGED TIME OF RESIN(Stage3) Collect Result_Dam',
 'Dispense Volume(Stage1) Collect Result_Dam',
 'Dispense Volume(Stage2) Collect Result_Dam',
 'Dispense Volume(Stage3) Collect Result_Dam',
 'HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam',
 'HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam',
 'HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam',
 'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam',
 'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam',
 'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam',
 'HEAD NORMAL COORDINATE Z AXIS_Dam',
 'Head Clean Position Z Collect Result_Dam',
 'Head Purge Position Z Collect Result_Dam',
 'Head Zero Position Y Collect Result_Dam',
 'Head Zero Position Z Collect Result_Dam',
 'Machine Tact time Collect Result_Dam',
 'Stage1 Circle Distance Speed_Dam',
#  'Stage1 Line1 Distance Speed Collect Result_Dam',
#  'Stage1 Line2 Distance Speed Collect Result_Dam',
#  'Stage1 Line3 Distance Speed Collect Result_Dam',
#  'Stage1 Line4 Distance Speed Collect Result_Dam',
 'Stage2 Circle Distance Speed_Dam',
#  'Stage2 Line1 Distance Speed Collect Result_Dam',
#  'Stage2 Line2 Distance Speed Collect Result_Dam',
#  'Stage2 Line3 Distance Speed Collect Result_Dam',
#  'Stage2 Line4 Distance Speed Collect Result_Dam',
 'Stage3 Circle Distance Speed_Dam',
#  'Stage3 Line1 Distance Speed Collect Result_Dam',
#  'Stage3 Line2 Distance Speed Collect Result_Dam',
#  'Stage3 Line3 Distance Speed Collect Result_Dam',
#  'Stage3 Line4 Distance Speed Collect Result_Dam',
 'THICKNESS 1 Collect Result_Dam',
 'THICKNESS 2 Collect Result_Dam',
 'THICKNESS 3 Collect Result_Dam',
 
 'DISCHARGED SPEED OF RESIN Collect Result_Fill1',
 'DISCHARGED TIME OF RESIN(Stage1) Collect Result_Fill1',
 'DISCHARGED TIME OF RESIN(Stage2) Collect Result_Fill1',
 'DISCHARGED TIME OF RESIN(Stage3) Collect Result_Fill1',
 'Dispense Volume(Stage1) Collect Result_Fill1',
 'Dispense Volume(Stage2) Collect Result_Fill1',
 'Dispense Volume(Stage3) Collect Result_Fill1',
 'HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1',
 'HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1',
 'HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1',
 'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1',
 'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1',
 'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1',
 'HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill1',
 'HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1',
 'HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill1',
 'Head Purge Position Z Collect Result_Fill1',
 'Machine Tact time Collect Result_Fill1',
 
 'CURE END POSITION Z Collect Result_Fill2',
 'CURE SPEED Collect Result_Fill2',
 'CURE START POSITION Z Collect Result_Fill2',
 'Machine Tact time Collect Result_Fill2',
 '1st Pressure Collect Result_AutoClave',
 '1st Pressure 1st Pressure Unit Time_AutoClave',
 '2nd Pressure Collect Result_AutoClave',
 '2nd Pressure Unit Time_AutoClave',
 '3rd Pressure Collect Result_AutoClave',
 '3rd Pressure Unit Time_AutoClave',
 'Chamber Temp. Collect Result_AutoClave',
 'Chamber Temp. Unit Time_AutoClave',
 'Chamber Temp. Judge Value_AutoClave',
#  '1st_Pressure_PCA',
#  '2nd_Pressure_PCA',
#  '3rd_Pressure_PCA',
#  'Minus1_Dam',
#  'Minus2_Dam',
 'Minus1Y_Dam',
 'Minus2Y_Dam',
#  'HEAD NORMAL COORDINATE Error (Stage1)_Fill1',
#  'HEAD NORMAL COORDINATE Error (Stage2)_Fill1',
#  'HEAD NORMAL COORDINATE Error (Stage3)_Fill1',
#  'HEAD NORMAL COORDINATE Error (Stage1)_Dam',
#  'HEAD NORMAL COORDINATE Error (Stage2)_Dam',
#  'HEAD NORMAL COORDINATE Error (Stage3)_Dam',
#  '1st Pressure Power_AutoClave',
#  '2nd Pressure Power_AutoClave',
#  '3rd Pressure Power_AutoClave',
#  '1st Power x Temp_AutoCLave',
#  '2nd Power x Temp_AutoCLave',
#  '3rd Power x Temp_AutoCLave',
 'Stage1 Line Sum Speed_Dam',
 'Stage2 Line Sum Speed_Dam',
 'Stage3 Line Sum Speed_Dam',
#  '1st Pressure x Time x Temp AutoClave',
#  '2nd Pressure x Time x Temp AutoClave',
#  '3rd Pressure x Time x Temp AutoClave',
#  'RESIN Predicted_Volume Stage1 Dam',
#  'RESIN Predicted_Volume Stage2 Dam',
#  'RESIN Predicted_Volume Stage3 Dam',
#  'Stage1 Scaling_Factor',
#  'Stage2 Scaling_Factor',
#  'Stage3 Scaling_Factor',
#  'RESIN Adjusted_Predicted_Volume Stage1 Dam',
#  'RESIN Adjusted_Predicted_Volume Stage2 Dam',
#  'RESIN Adjusted_Predicted_Volume Stage3 Dam',
#  'RESIN Predicted_Volume Stage1 Fill1',
#  'RESIN Predicted_Volume Stage2 Fill1',
#  'RESIN Predicted_Volume Stage3 Fill1',
#  'RESIN Adjusted_Predicted_Volume Stage1 Fill1',
#  'RESIN Adjusted_Predicted_Volume Stage2 Fill1',
#  'RESIN Adjusted_Predicted_Volume Stage3 Fill1',
 'Stage1 Line diffent Distance Speed_Dam',
 'Stage2 Line diffent Distance Speed_Dam',
 'Stage3 Line diffent Distance Speed_Dam',
#             'kmeans',
]



### 각 모델별 선정한 칼럼 데이터 만들기

In [438]:
cat_train = df_train[column].copy()
cat_test = df_test[column_t].copy()

In [439]:
lgbm_train = df_train[column].copy()
lgbm_test = df_test[column_t].copy()

In [440]:
xgb_train = df_train[column].copy()
xgb_test = df_test[column_t].copy()

### 범주형 변수 선택 및 전환

In [441]:

columns_to_convert = ['Equipment', 'PalletID', 'Model.Suffix', 'Receip No']# 'Workorder']
columns_to = [
    
# 'Year',
'Month',
# 'Day',
'Hour_Dam',
# 'Minute_Dam',
'Hour_Fill1',
# 'Minute_Fill1',
'Hour_Fill2',
# 'Minute_Fill2',
'Hour_AutoClave',
# 'Minute_AutoClave',
              'Head Zero Position Y Collect Result_Dam',
              'Head Zero Position Z Collect Result_Dam',
              'Head Clean Position Z Collect Result_Dam',
              'Head Purge Position Z Collect Result_Dam',
              'Head Purge Position Z Collect Result_Fill1',
              'CURE START POSITION Z Collect Result_Fill2',
              'CURE END POSITION Z Collect Result_Fill2',
              'CURE SPEED Collect Result_Fill2',
              'Stage1 Circle Distance Speed_Dam',
              'Stage2 Circle Distance Speed_Dam',
              'Stage3 Circle Distance Speed_Dam',
              'Stage1 Line diffent Distance Speed_Dam',
              'Stage2 Line diffent Distance Speed_Dam',
              'Stage3 Line diffent Distance Speed_Dam',
              'Minus1Y_Dam', 
              'Minus2Y_Dam', 
              'inconsistant',
              'HEAD NORMAL COORDINATE Z AXIS_Dam',
              '1st Pressure 1st Pressure Unit Time_AutoClave',
              '2nd Pressure Unit Time_AutoClave',
              '3rd Pressure Unit Time_AutoClave',
              'Chamber Temp. Judge Value_AutoClave',
              'workorder_for'
             ]

dtype = 'string'  # 원하는 데이터 타입
for column in columns_to_convert + columns_to:
    cat_train[column] = cat_train[column].astype(dtype)
    cat_test[column] = cat_test[column].astype(dtype)
    
dtype = 'category'  # 원하는 데이터 타입
for column in columns_to_convert + columns_to:
    cat_train[column] = cat_train[column].astype(dtype)
    cat_test[column] = cat_test[column].astype(dtype)
    
dtype = 'float'  # 원하는 데이터 타입
for column in columns_to_convert + columns_to:
    lgbm_train[column] = lgbm_train[column].astype(dtype)
    lgbm_test[column] = lgbm_test[column].astype(dtype)
    
dtype = 'category'  # 원하는 데이터 타입
for column in columns_to_convert + columns_to:
    lgbm_train[column] = lgbm_train[column].astype(dtype)
    lgbm_test[column] = lgbm_test[column].astype(dtype)
    
dtype = 'float'  # 원하는 데이터 타입
for column in columns_to_convert + columns_to:
    xgb_train[column] = xgb_train[column].astype(dtype)
    xgb_test[column] = xgb_test[column].astype(dtype)

### 모델링 추정

#### Catboost

In [442]:
X_train = cat_train.drop(columns=['target'])
y_train = cat_train['target'].apply(lambda x: True if x == 'AbNormal' else False)

cat_features_indices = columns_to_convert + columns_to

# 최적의 하이퍼파라미터로 모델 재학습
cat_best_params = {'iterations': 246, 'depth': 9, 'learning_rate': 0.025081156860452335} # study.best_trial.params
cat_best_params["random_seed"] = 42
cat_best_params['verbose'] = 0
cat_best_model = CatBoostClassifier(**cat_best_params)
cat_best_model.fit(X_train, y_train, cat_features=cat_features_indices)

<catboost.core.CatBoostClassifier at 0x7f68a16eaa10>

In [443]:
# predict proba 구하기
X_valid = cat_test
y_pred_proba = cat_best_model.predict_proba(X_valid)[:, 1]
proba1 = y_pred_proba

#### LightGBM

In [444]:
# optuna 최적 파라미터로 학습
X_train = lgbm_train.drop(columns=['target'])
y_train = lgbm_train['target'].apply(lambda x: True if x == 'AbNormal' else False)

lgbm_best_params = {'n_estimators': 904, 'max_depth': 31, 'learning_rate': 0.04149455426040617}
lgbm_best_params["random_seed"] = 42
lgbm_best_params["random_state"] = 42
lgbm_best_params['verbose'] = -1

lgbm_best_model = LGBMClassifier(**lgbm_best_params)
lgbm_best_model.fit(X_train, y_train, categorical_feature=cat_features_indices)

In [445]:
# proba 
X_valid = lgbm_test
y_pred_proba = lgbm_best_model.predict_proba(X_valid)[:, 1]
proba2 = y_pred_proba

#### XGboost

In [446]:
# 최적 파라미터로 학습
X_train = xgb_train.drop(columns=['target'])
y_train = xgb_train['target'].apply(lambda x: True if x == 'AbNormal' else False)

xgb_best_params =  {'n_estimators': 365, 'max_depth': 10, 'learning_rate': 0.008215675096193411} # {'n_estimators': 365, 'max_depth': 10, 'learning_rate': 0.008215675096193411} # study.best_trial.params
xgb_best_params["random_state"] = 42
xgb_best_params["seed"] = 42

xgb_best_model = xgb.XGBClassifier(**xgb_best_params)
xgb_best_model.fit(X_train, y_train)

In [447]:
X_valid = xgb_test
y_pred_proba = xgb_best_model.predict_proba(X_valid)[:, 1]
proba3 = y_pred_proba

In [448]:
# grid search로 찾은 weight 대입하여 threshold 적용
y_best = (14 * proba1 + 2 * proba2 + 3 * proba3) / (19)
y_pred_custom_threshold = (y_best >= 0.1339928075297637).astype(int)

In [449]:
# 최종 이상치 예측 개수 확인
sum(y_pred_custom_threshold)

374

## 6. Inference & Submission

### 제출코드

In [450]:
# 민감성 코드 - 하나라도 이상치라고 반영하면 무조건 이상치로 만들기
y_pred = np.where((y_pred_custom_threshold) == 0, "Normal", "AbNormal")

In [451]:
# submission
df_sub = pd.read_csv('submission.csv')
df_sub["target"] = y_pred

# 제출 파일 저장
df_sub.to_csv("submission.csv", index=False)