In [1]:
from skorch.helper import DataFrameTransformer
from skorch import NeuralNetClassifier
from sklearn.pipeline import Pipeline
from pycaret.datasets import get_data
from pycaret.classification import *
from sklearn.metrics import *
import torch.optim as optim
import torch.nn as nn
import numpy as np
import pycaret

In [2]:
data  = get_data('electrical_grid')

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stabf
0,2.95906,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,unstable
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.78176,stable
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.27721,-0.920492,0.163041,0.766689,0.839444,0.109853,unstable
3,0.716415,7.6696,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,unstable
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.79711,0.45545,0.656947,0.820923,unstable


In [3]:
target = 'stabf'
data[target].value_counts()

unstable    6380
stable      3620
Name: stabf, dtype: int64

In [4]:
data[target] = data[target].replace({'unstable' : 1, 
                                     'stable' : 0}).astype(np.int64)

train        = data.sample(frac = 0.95, random_state = 99)
test         = data.drop(train.index)

train.reset_index(inplace = True, drop = True)
test.reset_index(inplace  = True, drop = True)

clfl         = setup(data       = train, target = target,
                     train_size =  0.8, fold   =      5,
                     session_id =  999, log_experiment = False,
                     experiment_name = 'electrical_Grid_ml')

Unnamed: 0,Description,Value
0,Session id,999
1,Target,stabf
2,Target type,Binary
3,Original data shape,"(9500, 13)"
4,Transformed data shape,"(9500, 13)"
5,Transformed train set shape,"(7600, 13)"
6,Transformed test set shape,"(1900, 13)"
7,Numeric features,12
8,Preprocess,True
9,Imputation type,simple


In [5]:
best_model  = compare_models(sort = 'AUC')
tuned_model = create_model('catboost')
tuned_model = tune_model(tuned_model)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
catboost,CatBoost Classifier,0.9482,0.9913,0.9709,0.949,0.9598,0.8868,0.8873,0.376
lightgbm,Light Gradient Boosting Machine,0.9347,0.9857,0.9577,0.941,0.9493,0.8578,0.858,0.33
et,Extra Trees Classifier,0.9191,0.9818,0.9715,0.9081,0.9387,0.82,0.8241,0.384
rf,Random Forest Classifier,0.9187,0.977,0.9532,0.922,0.9373,0.8217,0.8226,0.346
gbc,Gradient Boosting Classifier,0.9121,0.9762,0.9575,0.9095,0.9329,0.8058,0.8081,0.324
qda,Quadratic Discriminant Analysis,0.7721,0.9435,0.8592,0.8342,0.8202,0.4814,0.5457,0.324
ada,Ada Boost Classifier,0.843,0.9271,0.8998,0.8606,0.8797,0.6542,0.6557,0.336
nb,Naive Bayes,0.8353,0.9174,0.9266,0.8338,0.8777,0.6275,0.6358,0.308
lda,Linear Discriminant Analysis,0.8174,0.8917,0.8789,0.8418,0.8599,0.5979,0.5991,0.308
lr,Logistic Regression,0.8188,0.8916,0.8814,0.842,0.8612,0.6007,0.602,0.938


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9428,0.9907,0.9691,0.9428,0.9558,0.8747,0.8754
1,0.9447,0.9898,0.9722,0.943,0.9574,0.8789,0.8797
2,0.952,0.9922,0.9691,0.9563,0.9626,0.8955,0.8956
3,0.9493,0.9916,0.9701,0.9514,0.9607,0.8896,0.8899
4,0.952,0.9919,0.9742,0.9516,0.9628,0.8951,0.8956
Mean,0.9482,0.9913,0.9709,0.949,0.9598,0.8868,0.8873
Std,0.0038,0.0009,0.002,0.0053,0.0028,0.0085,0.0083


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9401,0.9897,0.9639,0.9435,0.9536,0.8693,0.8697
1,0.9428,0.989,0.9639,0.9473,0.9555,0.8752,0.8755
2,0.9559,0.9919,0.9722,0.9593,0.9657,0.9041,0.9042
3,0.9539,0.9918,0.9742,0.9545,0.9642,0.8996,0.8999
4,0.9539,0.992,0.9742,0.9545,0.9642,0.8996,0.8999
Mean,0.9493,0.9909,0.9697,0.9518,0.9607,0.8895,0.8899
Std,0.0065,0.0013,0.0048,0.0057,0.005,0.0143,0.0143


Fitting 5 folds for each of 10 candidates, totalling 50 fits


In [6]:
tuned_model = finalize_model(tuned_model)
prediction  = predict_model(tuned_model, test)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,CatBoost Classifier,0.964,0.996,0.9688,0.9748,0.9718,0.9221,0.9221


In [7]:
preds = prediction['prediction_label']
gt    = test[target]

print(classification_report(preds, gt))

              precision    recall  f1-score   support

           0       0.96      0.95      0.95       182
           1       0.97      0.97      0.97       318

    accuracy                           0.96       500
   macro avg       0.96      0.96      0.96       500
weighted avg       0.96      0.96      0.96       500



In [8]:
class Net(nn.Module):
    
    def __init__(self, num_inputs = 12, num_units_d1 = 200, num_units_d2 = 100):
        
        super(Net, self).__init__()
        self.dense0  = nn.Linear(num_inputs, num_units_d1)
        self.nonlin  = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        
        self.dense1  = nn.Linear(num_units_d1, num_units_d2)
        self.output  = nn.Linear(num_units_d2, 2)
        
        self.softmax = nn.Softmax(dim = -1)
        
    
    def forward(self, X, **kwargs):
        
        X = self.nonlin(self.dense0(X))
        X = self.dropout(X)
        
        X = self.nonlin(self.dense1(X))
        X = self.softmax(self.output(X))
        
        return X

In [9]:
class customNLLLoss(nn.Module):
    
    criterion = nn.NLLLoss()
    
    def __init__(self):
        super().__init__()
        
    def forward(self, logits, target):
        return self.criterion(logits, target.long())

In [10]:
net = NeuralNetClassifier(
        module = Net, criterion = customNLLLoss,
        max_epochs = 30, lr = 0.1, batch_size = 32, train_split = None
    )

nn_pipe = Pipeline([
            ("transform", DataFrameTransformer()),
            ("net", net),
        ])

In [11]:
skorch_model = create_model(nn_pipe) 
best_model   = compare_models(include = [skorch_model, tuned_model], sort = "AUC")

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8053,0.8745,0.8258,0.8631,0.844,0.5852,0.5862
1,0.8197,0.8861,0.8536,0.8625,0.858,0.6112,0.6113
2,0.8132,0.8802,0.8784,0.8369,0.8571,0.5876,0.5891
3,0.8171,0.8926,0.838,0.8703,0.8538,0.6098,0.6106
4,0.8007,0.8705,0.8916,0.8136,0.8508,0.5524,0.5577
Mean,0.8112,0.8808,0.8575,0.8493,0.8528,0.5892,0.591
Std,0.0072,0.0079,0.0245,0.0212,0.0051,0.0214,0.0196


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
1,CatBoost Classifier,0.9493,0.9909,0.9697,0.9518,0.9607,0.8895,0.8899,0.39
0,NeuralNetClassifier,0.8112,0.8808,0.8575,0.8493,0.8528,0.5892,0.591,0.638


In [12]:
custom_grid = {
                'net__max_epochs' : [  20, 30],
                'net__lr'         : [0.01, 0.05, 0.1],
                'net__module__num_units_d1' : [50, 100, 150],
                'net__module__num_units_d2' : [50, 100, 150],
                'net__optimizer'            : [optim.Adam, optim.SGD, optim.RMSprop]
            }

tuned_skorch_model = tune_model(skorch_model, custom_grid = custom_grid)
best_model         = compare_models(include = [tuned_skorch_model, skorch_model, tuned_model], sort = "AUC")

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7947,0.8391,0.8866,0.8098,0.8465,0.5388,0.5439
1,0.7993,0.8677,0.8928,0.8116,0.8503,0.5483,0.554
2,0.8079,0.871,0.8773,0.8311,0.8536,0.575,0.5768
3,0.8191,0.8721,0.8937,0.8343,0.863,0.5977,0.6008
4,0.7974,0.843,0.9174,0.7959,0.8523,0.5346,0.5485
Mean,0.8037,0.8586,0.8936,0.8165,0.8531,0.5589,0.5648
Std,0.0089,0.0144,0.0133,0.0143,0.0055,0.024,0.0212


Fitting 5 folds for each of 10 candidates, totalling 50 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
2,CatBoost Classifier,0.9493,0.9909,0.9697,0.9518,0.9607,0.8895,0.8899,0.376
0,NeuralNetClassifier,0.8112,0.8808,0.8575,0.8493,0.8528,0.5892,0.591,0.672
1,NeuralNetClassifier,0.8112,0.8808,0.8575,0.8493,0.8528,0.5892,0.591,0.662
