In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pycaret.anomaly import *
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

pd.options.display.float_format = '{:.5f}'.format

In [7]:
data = pd.read_csv('./remove_outlier_dataset.csv', encoding='utf-8', index_col=0)
data["registration_time"] = pd.to_datetime(data["registration_time"])
data["working"] = data["working"].map({"가동":1,"정지":0})
x = data.drop(axis=1, columns="passorfail").drop(axis=1, columns="molten_temp_cate").drop(axis=1, columns="biscuit_thickness_cate")
y = data["passorfail"]
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.7, shuffle=True, random_state=42, stratify=y)

In [8]:
# models()

In [10]:
class PycaretAD:
    preds = {}
    modelTypes = []
    modelResults = {}
    def __init__(self, x_train, x_test):
        self.x_train = x_train
        self.x_test = x_test
        self.anomalySetup = setup(
            data = x_train,
            verbose = 0,
            session_id = 123, 
            normalize = True, 
            normalize_method="robust",
            use_gpu = True
            )

    def train(self, modelType):
        model = create_model(modelType)

        self.modelResults[modelType] = {}

        fitting = assign_model(model)
        self.modelResults[modelType]["model"] = model

        pred = predict_model(model, self.x_test)['Anomaly']
        self.modelResults[modelType]["pred"] = pred
        
        print(accuracy_score(y_test, pred))


In [10]:
pycaretAD = PycaretAD(x_train, x_test)

In [12]:
pycaretAD.train("cluster")

0.9327455552317109


In [15]:
pycaretAD.train("iforest")

0.9596691926552026


In [16]:
pycaretAD.train("histogram")

0.9345671815797144


In [18]:
pycaretAD.train("lof")

0.9305231710871466


In [20]:
pycaretAD.train("pca")

0.9397041678810842


In [21]:
pycaretAD.train("mcd")

0.9471728359078986


In [24]:
modelList = ["cluster","iforest","histogram","lof","pca","mcd"]

In [25]:
preds = []
for modelName in modelList:
    preds.append(pycaretAD.modelResults[modelName]["pred"])

In [35]:
pd.concat(preds + [y_test], axis=1, names=modelList + ["passorfail"]).to_csv("./pred_result.csv")

In [None]:
pd.concat(preds,axis=1)

In [None]:
model_sod = create_model("sod")
fitting = assign_model(model_sod)
pred = predict_model(model_sod, x_test)['Anomaly']
print(accuracy_score(y_test, pred))

In [28]:
x_train.columns

Index(['count', 'working', 'molten_temp', 'facility_operation_cycleTime',
       'production_cycletime', 'low_section_speed', 'high_section_speed',
       'molten_volume', 'cast_pressure', 'biscuit_thickness',
       'upper_mold_temp1', 'upper_mold_temp2', 'upper_mold_temp3',
       'lower_mold_temp1', 'lower_mold_temp2', 'lower_mold_temp3',
       'sleeve_temperature', 'physical_strength', 'Coolant_temperature',
       'EMS_operation_time', 'registration_time', 'tryshot_signal',
       'mold_code', 'heating_furnace'],
      dtype='object')

In [32]:
x_train2 = x_train[["cast_pressure", "biscuit_thickness", "lower_mold_temp2", "sleeve_temperature"]]

In [33]:
x_test2 = x_test[["cast_pressure", "biscuit_thickness", "lower_mold_temp2", "sleeve_temperature"]]

In [26]:
pycaretAD2 = PycaretAD(x_train2, x_test2)

In [27]:
pycaretAD2.train("iforest")

0.9669921305741767


In [35]:
x_test2.mean()

cast_pressure        324.98437
biscuit_thickness     50.18238
lower_mold_temp2     199.77845
sleeve_temperature   417.64511
dtype: float64

In [38]:
posiData = data[["cast_pressure", "biscuit_thickness", "lower_mold_temp2", "sleeve_temperature",""]]

Unnamed: 0,cast_pressure,biscuit_thickness,lower_mold_temp2,sleeve_temperature
90523,333.00000,54.00000,219.00000,296.00000
81539,332.00000,49.00000,243.00000,102.00000
56775,329.00000,53.00000,181.00000,429.00000
12274,333.00000,53.00000,161.00000,504.00000
37784,331.00000,45.00000,256.00000,486.00000
...,...,...,...,...
10562,333.00000,54.00000,140.00000,474.00000
39428,331.00000,42.00000,180.00000,475.00000
6885,333.00000,45.00000,137.00000,498.00000
13704,331.00000,52.00000,199.00000,477.00000
