In [1]:
from pycaret.classification import *
from sklearn.model_selection import train_test_split

import pandas as pd
import os

In [2]:
ROOT_DIR = "../data"
RANDOM_STATE = 200

train_data = pd.read_csv(os.path.join(ROOT_DIR, "pca_train_.csv"))
train_data.rename(columns={'12': 'target'}, inplace=True)

df_normal = train_data[train_data["target"] == "Normal"]
df_abnormal = train_data[train_data["target"] == "AbNormal"]

normal_ratio = 0.5

num_normal = len(df_normal)
num_abnormal = len(df_abnormal)

## under sampling
df_normal = df_normal.sample(n=int(num_abnormal * normal_ratio), replace=False, random_state=RANDOM_STATE)
df_concat = pd.concat([df_normal, df_abnormal], axis=0).reset_index(drop=True)

train_data = df_concat

# df_train, df_val = train_test_split(
#     df_concat,
#     test_size=0.3,
#     stratify=df_concat["target"],
#     random_state=RANDOM_STATE,
# )

In [3]:
setup_clf = setup(data=train_data, target="target", train_size=0.7, session_id=333, fold_shuffle=True)

Unnamed: 0,Description,Value
0,Session id,333
1,Target,target
2,Target type,Binary
3,Target mapping,"AbNormal: 0, Normal: 1"
4,Original data shape,"(3525, 16)"
5,Transformed data shape,"(3525, 40)"
6,Transformed train set shape,"(2467, 40)"
7,Transformed test set shape,"(1058, 40)"
8,Numeric features,3
9,Categorical features,12


In [4]:
model = compare_models(sort='F1', n_select=10)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lda,Linear Discriminant Analysis,0.6676,0.6246,0.6676,0.6483,0.652,0.1978,0.203,0.021
ridge,Ridge Classifier,0.6721,0.6254,0.6721,0.6493,0.6515,0.1944,0.2024,0.02
lr,Logistic Regression,0.6761,0.627,0.6761,0.6509,0.65,0.1898,0.2016,0.222
gbc,Gradient Boosting Classifier,0.666,0.6129,0.666,0.6444,0.6483,0.188,0.1938,0.092
ada,Ada Boost Classifier,0.6526,0.6071,0.6526,0.6374,0.6419,0.1789,0.1814,0.039
lightgbm,Light Gradient Boosting Machine,0.6417,0.6032,0.6417,0.6268,0.6317,0.1563,0.1582,0.273
rf,Random Forest Classifier,0.6312,0.6032,0.6312,0.6242,0.6267,0.1527,0.1535,0.061
et,Extra Trees Classifier,0.6198,0.5933,0.6198,0.6184,0.6186,0.1405,0.1408,0.059
knn,K Neighbors Classifier,0.6393,0.5798,0.6393,0.6115,0.6176,0.1165,0.1209,0.022
dt,Decision Tree Classifier,0.6141,0.5624,0.6141,0.6117,0.6123,0.1254,0.1258,0.02


In [5]:
tuned_model = [tune_model(i) for i in model[:9]]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.664,0.6152,0.664,0.4409,0.5299,0.0,0.0
1,0.664,0.5738,0.664,0.4409,0.5299,0.0,0.0
2,0.668,0.5273,0.668,0.4462,0.5351,0.0,0.0
3,0.668,0.5816,0.668,0.4462,0.5351,0.0,0.0
4,0.668,0.6061,0.668,0.4462,0.5351,0.0,0.0
5,0.668,0.5152,0.668,0.4462,0.5351,0.0,0.0
6,0.668,0.5289,0.668,0.4462,0.5351,0.0,0.0
7,0.6667,0.5858,0.6667,0.4444,0.5333,0.0,0.0
8,0.6667,0.5863,0.6667,0.4444,0.5333,0.0,0.0
9,0.6667,0.5599,0.6667,0.4444,0.5333,0.0,0.0


Fitting 10 folds for each of 10 candidates, totalling 100 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6964,0.6656,0.6964,0.678,0.6766,0.2553,0.2666
1,0.6842,0.6051,0.6842,0.6595,0.6512,0.1967,0.2159
2,0.6599,0.6536,0.6599,0.6241,0.6235,0.126,0.1386
3,0.668,0.6476,0.668,0.6379,0.6377,0.1586,0.1707
4,0.668,0.5959,0.668,0.6451,0.6489,0.1867,0.1932
5,0.6478,0.5511,0.6478,0.6146,0.6194,0.1164,0.1236
6,0.6721,0.6418,0.6721,0.6414,0.6382,0.1602,0.1751
7,0.6829,0.633,0.6829,0.6587,0.6564,0.2041,0.2176
8,0.6789,0.6213,0.6789,0.6574,0.6594,0.2126,0.2208
9,0.7236,0.6875,0.7236,0.7102,0.7004,0.3061,0.3264


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6923,0.657,0.6923,0.6728,0.6713,0.2429,0.2545
1,0.6802,0.5964,0.6802,0.6527,0.64,0.1724,0.1955
2,0.6721,0.6608,0.6721,0.6402,0.6355,0.1543,0.1707
3,0.6761,0.6468,0.6761,0.6508,0.6512,0.1903,0.2011
4,0.6923,0.6041,0.6923,0.6701,0.6664,0.2255,0.2404
5,0.6437,0.5452,0.6437,0.6085,0.6138,0.1032,0.11
6,0.664,0.6347,0.664,0.6289,0.6265,0.1334,0.1476
7,0.6829,0.6365,0.6829,0.6578,0.654,0.1986,0.2138
8,0.6829,0.6239,0.6829,0.6628,0.6647,0.2252,0.2331
9,0.7154,0.6936,0.7154,0.7001,0.6895,0.2808,0.3023


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6842,0.6469,0.6842,0.6621,0.6605,0.2178,0.23
1,0.664,0.6048,0.664,0.6325,0.6302,0.1481,0.1616
2,0.668,0.6354,0.668,0.6339,0.6296,0.1408,0.1568
3,0.664,0.6248,0.664,0.6349,0.6369,0.1571,0.1667
4,0.664,0.5622,0.664,0.638,0.6414,0.1684,0.176
5,0.6275,0.5357,0.6275,0.598,0.606,0.0875,0.0905
6,0.6599,0.6357,0.6599,0.6374,0.6423,0.1723,0.1774
7,0.6789,0.6175,0.6789,0.6628,0.6666,0.233,0.2371
8,0.6667,0.6051,0.6667,0.6439,0.6475,0.1854,0.1919
9,0.6951,0.684,0.6951,0.6784,0.6802,0.2623,0.2693


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6802,0.6522,0.6802,0.6567,0.6551,0.2052,0.2175
1,0.6802,0.6056,0.6802,0.6531,0.6428,0.1781,0.1991
2,0.6721,0.6418,0.6721,0.6391,0.6327,0.1483,0.1663
3,0.668,0.6284,0.668,0.6379,0.6377,0.1586,0.1707
4,0.668,0.5728,0.668,0.6422,0.6447,0.1757,0.1842
5,0.6478,0.5331,0.6478,0.6146,0.6194,0.1164,0.1236
6,0.664,0.6363,0.664,0.6289,0.6265,0.1334,0.1476
7,0.6911,0.6145,0.6911,0.6676,0.658,0.2083,0.2292
8,0.6667,0.6088,0.6667,0.6439,0.6475,0.1854,0.1919
9,0.7114,0.6915,0.7114,0.695,0.684,0.268,0.29


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6802,0.6415,0.6802,0.6621,0.6649,0.2306,0.2366
1,0.6842,0.6085,0.6842,0.665,0.6665,0.233,0.241
2,0.6559,0.6079,0.6559,0.6337,0.639,0.1652,0.1696
3,0.668,0.6157,0.668,0.6451,0.6489,0.1867,0.1932
4,0.6761,0.5684,0.6761,0.6532,0.6554,0.2012,0.2095
5,0.6275,0.5381,0.6275,0.598,0.606,0.0875,0.0905
6,0.6721,0.6457,0.6721,0.6478,0.6501,0.1885,0.197
7,0.687,0.6118,0.687,0.6681,0.6699,0.2376,0.2453
8,0.6545,0.5925,0.6545,0.6397,0.6446,0.1853,0.1872
9,0.6911,0.6818,0.6911,0.6744,0.6768,0.2549,0.261


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6842,0.6572,0.6842,0.6621,0.6605,0.2178,0.23
1,0.6802,0.6159,0.6802,0.6531,0.6428,0.1781,0.1991
2,0.6761,0.5975,0.6761,0.6446,0.6358,0.1558,0.176
3,0.668,0.6192,0.668,0.6379,0.6377,0.1586,0.1707
4,0.6721,0.5622,0.6721,0.6478,0.6501,0.1885,0.197
5,0.6478,0.5422,0.6478,0.6146,0.6194,0.1164,0.1236
6,0.664,0.6292,0.664,0.6289,0.6265,0.1334,0.1476
7,0.687,0.6156,0.687,0.6618,0.6522,0.1951,0.216
8,0.6667,0.5967,0.6667,0.6439,0.6475,0.1854,0.1919
9,0.7114,0.6707,0.7114,0.695,0.684,0.268,0.29


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6802,0.6494,0.6802,0.6567,0.6551,0.2052,0.2175
1,0.6802,0.5987,0.6802,0.6531,0.6428,0.1781,0.1991
2,0.664,0.6399,0.664,0.6304,0.6293,0.1394,0.1525
3,0.6761,0.6261,0.6761,0.6496,0.6489,0.1848,0.197
4,0.668,0.5676,0.668,0.6422,0.6447,0.1757,0.1842
5,0.6478,0.5363,0.6478,0.6146,0.6194,0.1164,0.1236
6,0.668,0.651,0.668,0.6352,0.6324,0.1468,0.1615
7,0.7033,0.62,0.7033,0.6843,0.6773,0.2526,0.2705
8,0.6667,0.6018,0.6667,0.6439,0.6475,0.1854,0.1919
9,0.7154,0.6897,0.7154,0.7001,0.6895,0.2808,0.3023


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6518,0.5764,0.6518,0.5906,0.5757,0.0432,0.0597
1,0.6599,0.6148,0.6599,0.6145,0.5982,0.0855,0.1079
2,0.6437,0.5491,0.6437,0.5781,0.5772,0.0302,0.0383
3,0.668,0.6058,0.668,0.619,0.5788,0.0543,0.0875
4,0.6802,0.6045,0.6802,0.6542,0.6015,0.0993,0.1479
5,0.6397,0.5329,0.6397,0.5191,0.54,-0.0305,-0.0507
6,0.6802,0.5868,0.6802,0.6508,0.6104,0.1129,0.155
7,0.6911,0.664,0.6911,0.6842,0.6174,0.1364,0.1987
8,0.6667,0.5834,0.6667,0.6199,0.5872,0.0682,0.0993
9,0.6626,0.5463,0.6626,0.6081,0.5797,0.0532,0.0793


Fitting 10 folds for each of 10 candidates, totalling 100 fits


In [6]:
blended_hard = blend_models(estimator_list=tuned_model,
                       fold=10,
                       method="hard",
                       optimize="F1")

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6923,0.0,0.6923,0.6728,0.6713,0.2429,0.2545
1,0.6761,0.0,0.6761,0.6477,0.6396,0.1705,0.1894
2,0.668,0.0,0.668,0.6339,0.6296,0.1408,0.1568
3,0.6721,0.0,0.6721,0.6438,0.6433,0.1717,0.1839
4,0.6721,0.0,0.6721,0.6478,0.6501,0.1885,0.197
5,0.6437,0.0,0.6437,0.6106,0.6163,0.1093,0.1156
6,0.668,0.0,0.668,0.6366,0.6351,0.1528,0.1661
7,0.687,0.0,0.687,0.6635,0.6596,0.2116,0.2267
8,0.6667,0.0,0.6667,0.6439,0.6475,0.1854,0.1919
9,0.7154,0.0,0.7154,0.7001,0.6895,0.2808,0.3023


In [7]:
final_model = finalize_model(blended_hard)
evaluate_model(final_model)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

In [8]:
final_model

In [9]:
test_data = pd.read_csv(os.path.join(ROOT_DIR, "pca_test_.csv"))

train_columns = train_data.columns
#test_data = test_data[train_columns]

df_test_x = test_data#.drop(columns=['target'])

In [10]:
test_pred = predict_model(final_model, data=test_data)
test_pred



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,prediction_label
0,Dam dispenser #2,AJX75334501,3J1XF767-1,AJX75334501,3J1XF767-1,OK,Fill1 dispenser #2,AJX75334501,3J1XF767-1,Fill2 dispenser #2,AJX75334501,3J1XF767-1,,1537.304077,-8587.737305,-1290.009155,AbNormal
1,Dam dispenser #2,AJX75334501,4B1XD472-2,AJX75334501,4B1XD472-2,OK,Fill1 dispenser #2,AJX75334501,4B1XD472-2,Fill2 dispenser #2,AJX75334501,4B1XD472-2,,-5808.053711,2417.458252,-613.402466,Normal
2,Dam dispenser #1,AJX75334501,3H1XE355-1,AJX75334501,3H1XE355-1,OK,Fill1 dispenser #1,AJX75334501,3H1XE355-1,Fill2 dispenser #1,AJX75334501,3H1XE355-1,,12760.987305,2190.877686,3088.680664,AbNormal
3,Dam dispenser #2,AJX75334501,3L1XA128-1,AJX75334501,3L1XA128-1,OK,Fill1 dispenser #2,AJX75334501,3L1XA128-1,Fill2 dispenser #2,AJX75334501,3L1XA128-1,,-12905.684570,-2384.216797,2237.460693,Normal
4,Dam dispenser #1,AJX75334501,4A1XA639-1,AJX75334501,4A1XA639-1,OK,Fill1 dispenser #1,AJX75334501,4A1XA639-1,Fill2 dispenser #1,AJX75334501,4A1XA639-1,,-3197.746094,3308.233398,-1520.130127,AbNormal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17356,Dam dispenser #2,AJX75334501,3K1XB597-1,AJX75334501,3K1XB597-1,OK,Fill1 dispenser #2,AJX75334501,3K1XB597-1,Fill2 dispenser #2,AJX75334501,3K1XB597-1,,-12908.366211,-2379.953125,2232.846924,AbNormal
17357,Dam dispenser #2,AJX75334501,4A1XB974-1,AJX75334501,4A1XB974-1,OK,Fill1 dispenser #2,AJX75334501,4A1XB974-1,Fill2 dispenser #2,AJX75334501,4A1XB974-1,,-3385.885986,3359.517090,-1899.730835,Normal
17358,Dam dispenser #1,AJX75334501,3L1XA998-1,AJX75334501,3L1XA998-1,OK,Fill1 dispenser #1,AJX75334501,3L1XA998-1,Fill2 dispenser #1,AJX75334501,3L1XA998-1,,-2732.416504,2977.588379,-1937.937500,AbNormal
17359,Dam dispenser #1,AJX75334501,3F1XC376-1,AJX75334501,3F1XC376-1,OK,Fill1 dispenser #1,AJX75334501,3F1XC376-1,Fill2 dispenser #1,AJX75334501,3F1XC376-1,,18232.107422,-1331.921143,-2312.770508,AbNormal


In [11]:
df_sub = pd.read_csv("../data/submission.csv")
df_sub["target"] = test_pred["prediction_label"]
df_sub.to_csv("../data/automl/submission-18.csv", index=False)