In [None]:
from pycaret.classification import *
from sklearn.model_selection import train_test_split


import pandas as pd
import os

In [None]:
ROOT_DIR = "../data"
RANDOM_STATE = 200

train_data = pd.read_csv(os.path.join(ROOT_DIR, "train.csv"))

df_normal = train_data[train_data["target"] == "Normal"]
df_abnormal = train_data[train_data["target"] == "AbNormal"]

normal_ratio = 1.0

num_normal = len(df_normal)
num_abnormal = len(df_abnormal)

df_normal = df_normal.sample(n=int(num_abnormal * normal_ratio), replace=False, random_state=RANDOM_STATE)
df_concat = pd.concat([df_normal, df_abnormal], axis=0).reset_index(drop=True)

df_train, df_val = train_test_split(
    df_concat,
    test_size=0.3,
    stratify=df_concat["target"],
    random_state=RANDOM_STATE,
)

In [None]:
setup_clf = setup(data=df_concat, target="target", train_size=0.7, session_id=777)

In [None]:
models()

In [None]:
model = compare_models(sort='F1', fold=5, n_select=5)

In [None]:
tuned_model = [tune_model(i) for i in model]

In [None]:
blended_soft = blend_models(estimator_list=tuned_model[:2],
                       fold=5,
                       method="soft",
                       optimize="F1")

In [None]:
final_model = finalize_model(blended_soft)
evaluate_model(final_model)

In [37]:
# Prepare training data
features = []

for col in df_train.columns:
    try:
        df_train[col] = df_train[col].astype(int)
        features.append(col)
    except:
        continue

test_data = pd.read_csv(os.path.join(ROOT_DIR, "test.csv"))
df_test_x = test_data[df_val.columns]

In [44]:
df_test_x = df_test_x.drop(columns=['target'])

In [45]:
test_pred = predict_model(final_model, data=df_test_x)


In [48]:
test_pred

Unnamed: 0,Wip Line_Dam,Process Desc._Dam,Equipment_Dam,Model.Suffix_Dam,Workorder_Dam,Insp. Seq No._Dam,Insp Judge Code_Dam,CURE END POSITION X Collect Result_Dam,CURE END POSITION X Unit Time_Dam,CURE END POSITION X Judge Value_Dam,...,Production Qty Unit Time_Fill2,Production Qty Judge Value_Fill2,Receip No Collect Result_Fill2,Receip No Unit Time_Fill2,Receip No Judge Value_Fill2,WorkMode Collect Result_Fill2,WorkMode Unit Time_Fill2,WorkMode Judge Value_Fill2,prediction_label,prediction_score
0,IVI-OB6,Dam Dispenser,Dam dispenser #2,AJX75334501,3J1XF767-1,1,OK,1000.0,,,...,,,1,,,0,,,AbNormal,0.6890
1,IVI-OB6,Dam Dispenser,Dam dispenser #2,AJX75334501,4B1XD472-2,1,OK,1000.0,,,...,,,256,,,1,,,Normal,0.7094
2,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,3H1XE355-1,1,OK,240.0,,,...,,,1,,,0,,,AbNormal,0.5753
3,IVI-OB6,Dam Dispenser,Dam dispenser #2,AJX75334501,3L1XA128-1,1,OK,1000.0,,,...,,,0,,,1,,,Normal,0.6373
4,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,4A1XA639-1,1,OK,240.0,,,...,,,215,,,1,,,AbNormal,0.5442
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17356,IVI-OB6,Dam Dispenser,Dam dispenser #2,AJX75334501,3K1XB597-1,1,OK,1000.0,,,...,,,131,,,1,,,AbNormal,0.7099
17357,IVI-OB6,Dam Dispenser,Dam dispenser #2,AJX75334501,4A1XB974-1,1,OK,1000.0,,,...,,,279,,,1,,,Normal,0.6198
17358,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,3L1XA998-1,1,OK,240.0,,,...,,,66,,,1,,,AbNormal,0.6654
17359,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,3F1XC376-1,1,OK,240.0,,,...,,,1,,,0,,,AbNormal,0.7474


In [49]:
df_sub = pd.read_csv("../data/submission.csv")
df_sub["target"] = test_pred["prediction_label"]
df_sub.to_csv("../data/automl-1/submission.csv", index=False)