In [232]:
import pandas as pd
import numpy as np
import sklearn.utils

from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from numpy import mean, std
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import recall_score, accuracy_score, precision_score, f1_score, confusion_matrix
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import LeaveOneOut
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.preprocessing import StandardScaler

In [233]:
df = pd.read_csv('stage_c_data.csv')
df.head(5)

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stab,stabf
0,2.95906,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,0.055347,unstable
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.78176,-0.005957,stable
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.27721,-0.920492,0.163041,0.766689,0.839444,0.109853,0.003471,unstable
3,0.716415,7.6696,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,0.028871,unstable
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.79711,0.45545,0.656947,0.820923,0.04986,unstable


In [234]:
df.isna().sum()

tau1     0
tau2     0
tau3     0
tau4     0
p1       0
p2       0
p3       0
p4       0
g1       0
g2       0
g3       0
g4       0
stab     0
stabf    0
dtype: int64

In [235]:
df = df.drop('stab', axis = 1)
df.head(5)

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stabf
0,2.95906,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,unstable
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.78176,stable
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.27721,-0.920492,0.163041,0.766689,0.839444,0.109853,unstable
3,0.716415,7.6696,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,unstable
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.79711,0.45545,0.656947,0.820923,unstable


In [236]:
X = df.drop(columns = 'stabf')
X.head(5)

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4
0,2.95906,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.78176
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.27721,-0.920492,0.163041,0.766689,0.839444,0.109853
3,0.716415,7.6696,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.79711,0.45545,0.656947,0.820923


In [237]:
y = df['stabf']
y.head(5)

0    unstable
1      stable
2    unstable
3    unstable
4    unstable
Name: stabf, dtype: object

In [238]:
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size = 0.2, random_state = 1)

In [239]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_train.head(5)

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4
2694,6.255995,2.542401,7.024714,9.476518,3.529888,-1.224881,-0.688228,-1.61678,0.568221,0.618403,0.685739,0.660088
5140,5.070581,5.490253,8.075688,0.761075,4.220888,-1.280596,-1.902185,-1.038107,0.443515,0.097244,0.916955,0.129254
2568,1.220072,8.804028,3.874283,8.433949,3.614027,-1.039236,-0.953566,-1.621224,0.908353,0.923594,0.238881,0.660156
3671,7.498402,6.697603,8.798626,2.126236,3.134585,-1.581906,-0.589386,-0.963293,0.260826,0.899003,0.964752,0.600598
7427,7.074006,1.337511,6.100756,7.759156,2.526922,-0.92254,-0.6326,-0.971782,0.98458,0.716082,0.836928,0.165162


In [240]:
y_train = y_train.replace({'stable': 1, 'unstable': 0})
y_train.head(5)

2694    0
5140    0
2568    0
3671    0
7427    0
Name: stabf, dtype: int64

In [241]:
y_test = y_test.replace({'stable': 1, 'unstable': 0})
y_test.head(5)

9953    0
3850    0
4962    1
3886    1
5437    0
Name: stabf, dtype: int64

In [242]:
X_test_scaled = scaler.fit_transform(X_test)
X_test.head(5)

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4
9953,6.877876,4.11382,9.356768,8.299753,4.056779,-1.89747,-1.590581,-0.568728,0.276567,0.845536,0.11244,0.822562
3850,5.802841,6.271371,4.73154,3.819867,3.579569,-1.70948,-1.067511,-0.802579,0.077527,0.416478,0.912846,0.861306
4962,2.286998,4.385142,2.830232,5.29388,3.035814,-1.202764,-0.902011,-0.931039,0.924216,0.130186,0.703887,0.063811
3886,5.01992,2.209962,6.26608,0.578901,4.322584,-1.960207,-1.074561,-1.287815,0.54691,0.065992,0.427349,0.814648
5437,7.646145,9.187896,5.484219,9.934313,3.634226,-1.254541,-1.335366,-1.044319,0.561528,0.121611,0.787318,0.300314


# RANDOM FOREST

In [243]:
#train

model = RandomForestClassifier(random_state = 1)
model.fit(X_train, y_train)

RandomForestClassifier(random_state=1)

In [244]:
new_predictions = model.predict(X_test)

cnf_mat = confusion_matrix(y_test, new_predictions)
cnf_mat

array([[1238,   50],
       [  91,  621]])

In [245]:
accuracy = accuracy_score(new_predictions, y_test)
print( 'Accuracy: {}' .format(round(accuracy*100 ), 4))

Accuracy: 93


EVALUATION

In [246]:
score = model.score(X_test, y_test)
score

0.9295

In [247]:
#CROSS-VALIDATION

cv_RF = cross_val_score(model, X_train, y_train, cv= 5 , scoring= 'f1_macro' )
cv_RF

array([0.91114291, 0.91121721, 0.91251512, 0.89775316, 0.91389817])

In [248]:
average_score_cv_RF = cv_RF.mean() * 100
average_score_cv_RF

90.9305315608359

In [249]:
#K-FOLD VALIDATION

k_fold_RF = KFold(n_splits = 5, random_state = None)
k_fold_RF = cross_val_score(model, X_train, y_train, cv= k_fold_RF, scoring = 'f1_macro')
k_fold_RF

array([0.89689287, 0.91360336, 0.9087038 , 0.89908125, 0.91593395])

In [250]:
average_score_kfold_RF = k_fold_RF.mean() * 100
average_score_kfold_RF

90.68430434415845

In [251]:
#Stratified K-FOLD VALIDATION

S_k_fold_RF = StratifiedKFold(n_splits = 5, random_state = None)
S_k_fold_RF = cross_val_score(model, X_train, y_train, cv= S_k_fold_RF, scoring = 'f1_macro')
S_k_fold_RF

array([0.91114291, 0.91121721, 0.91251512, 0.89775316, 0.91389817])

In [252]:
average_score_skfold_RF = S_k_fold_RF.mean() * 100
average_score_skfold_RF

90.9305315608359

In [253]:
# #lEAVE ONE OUT VALIDATION

# loo_RF = LeaveOneOut()
# loo_RF = cross_val_score(model, X_train, y_train, cv= loo_RF, scoring = 'f1_macro')

# average_score_RF = loo_RF.mean() * 100
# average_score_RF

# EXTRA TREES CLASSIFER

In [254]:
model_params = {'n_estimators' : 100, 'min_samples_split' : 2, 'min_samples_leaf' : 8, 'max_features' : None}

In [255]:
rf_model = RandomizedSearchCV
clf = RandomizedSearchCV(rf_model, model_params, cv=5, n_iter=10, scoring = 'accuracy', n_jobs = -1, verbose = 1)

#rf = clf.fit(X_train, y_train)

In [256]:
#train


model_EXT = ExtraTreesClassifier(random_state = 1)
model_EXT.fit(X_train, y_train)

ExtraTreesClassifier(random_state=1)

EVALUATIONS

In [257]:
score_EXT = model_EXT.score(X_test, y_test)
score_EXT

0.9285

In [258]:
#CROSS-VALIDATION

cv_EXT = cross_val_score(model_EXT, X_train, y_train, cv= 5 , scoring= 'f1_macro' )
cv_EXT

array([0.90125198, 0.90338773, 0.91047057, 0.90799104, 0.91886396])

In [259]:
average_score_cv_EXT = cv_EXT.mean() * 100
average_score_cv_EXT

90.83930561479285

In [260]:
#K-FOLD VALIDATION

k_fold_EXT = KFold(n_splits = 5, random_state = None)
k_fold_EXT = cross_val_score(model_EXT, X_train, y_train, cv= k_fold_EXT, scoring = 'f1_macro')
k_fold_EXT

array([0.89993313, 0.91754002, 0.91930084, 0.90313636, 0.90914017])

In [261]:
average_score_kfold_EXT = k_fold_EXT.mean() * 100
average_score_kfold_EXT

90.98101046883862

In [262]:
#Stratified K-FOLD VALIDATION

S_k_fold_EXT = StratifiedKFold(n_splits = 5, random_state = None)
S_k_fold_EXT = cross_val_score(model_EXT, X_train, y_train, cv= S_k_fold_EXT, scoring = 'f1_macro')
S_k_fold_EXT

array([0.90125198, 0.90338773, 0.91047057, 0.90799104, 0.91886396])

In [263]:
average_score_skfold_EXT = S_k_fold_EXT.mean() * 100
average_score_skfold_EXT

90.83930561479285

In [264]:
# #lEAVE ONE OUT VALIDATION

# loo_EXT = LeaveOneOut()
# loo_EXT = cross_val_score(model_EXT, X_train, y_train, cv= loo_EXT, scoring = 'f1_macro')

# average_score_EXT = loo_EXT.mean() * 100
# average_score_EXT

# XGBOOST CLASSIFIER

In [265]:
#train

model_XGB = XGBClassifier(random_state = 1)
model_XGB.fit(X_train, y_train)

XGBClassifier(random_state=1)

In [266]:
new_predictions = model_XGB.predict(X_test)

cnf_mat = confusion_matrix(y_test, new_predictions)
cnf_mat

array([[1236,   52],
       [ 109,  603]])

In [267]:
accuracy = accuracy_score(new_predictions, y_test)
print( 'Accuracy: {}' .format(round(accuracy*100 ), 4))

Accuracy: 92


EVALUATIONS

In [268]:
score_XGB = model_XGB.score(X_test, y_test)
score_XGB

0.9195

In [269]:
#CROSS-VALIDATION

cv_XGB = cross_val_score(model_XGB, X_train, y_train, cv= 5 , scoring= 'f1_macro' )
cv_XGB

array([0.89416221, 0.91816775, 0.91843641, 0.9083106 , 0.92672083])

In [270]:
average_score_cv_XGB = cv_XGB.mean() * 100
average_score_cv_XGB

91.3159560444606

In [271]:
#K-FOLD VALIDATION

k_fold_XGB = KFold(n_splits = 5, random_state = None)
k_fold_XGB = cross_val_score(model_XGB, X_train, y_train, cv= k_fold_XGB, scoring = 'f1_macro')
k_fold_XGB

array([0.89065725, 0.91569478, 0.91459966, 0.90555408, 0.92110713])

In [272]:
average_score_kfold_XGB = k_fold_XGB.mean() * 100
average_score_kfold_XGB

90.95225794624282

In [273]:
#Stratified K-FOLD VALIDATION

S_k_fold_XGB = StratifiedKFold(n_splits = 5, random_state = None)
S_k_fold_XGB = cross_val_score(model_XGB, X_train, y_train, cv= S_k_fold_XGB, scoring = 'f1_macro')
S_k_fold_XGB

array([0.89416221, 0.91816775, 0.91843641, 0.9083106 , 0.92672083])

In [274]:
average_score_skfold_XGB = S_k_fold_XGB.mean() * 100
average_score_skfold_XGB

91.3159560444606

In [275]:
# #lEAVE ONE OUT VALIDATION

# loo_XGB = LeaveOneOut()
# loo_XGB = cross_val_score(model_XGB, X_train, y_train, cv= loo_XGB, scoring = 'f1_macro')

# average_score_XGB = loo_XGB.mean() * 100
# average_score_XGB

# LIGHTGBM CLASSIFIER

In [276]:
#train

model_LGB = LGBMClassifier(random_state = 1)
model_LGB.fit(X_train, y_train)

LGBMClassifier(random_state=1)

EVALUATION

In [277]:
score_LGB = model_LGB.score(X_test, y_test)
score_LGB

0.9375

In [278]:
#CROSS-VALIDATION

cv_LGB = cross_val_score(model_LGB, X_train, y_train, cv= 5 , scoring= 'f1_macro' )
cv_LGB

array([0.92843386, 0.94817937, 0.92658942, 0.92809053, 0.94694022])

In [279]:
average_score_cv_LGB = cv_LGB.mean() * 100
average_score_cv_LGB

93.56466815809814

In [280]:
#K-FOLD VALIDATION

k_fold_LGB = KFold(n_splits = 5, random_state = None)
k_fold_LGB = cross_val_score(model_LGB, X_train, y_train, cv= k_fold_LGB, scoring = 'f1_macro')
k_fold_LGB

array([0.93006871, 0.94233376, 0.93178646, 0.93148702, 0.94739339])

In [281]:
average_score_kfold_LGB = k_fold_LGB.mean() * 100
average_score_kfold_LGB

93.66138685894839

In [282]:
#Stratified K-FOLD VALIDATION

S_k_fold_LGB = StratifiedKFold(n_splits = 5, random_state = None)
S_k_fold_LGB = cross_val_score(model_LGB, X_train, y_train, cv= S_k_fold_LGB, scoring = 'f1_macro')
S_k_fold_LGB

array([0.92843386, 0.94817937, 0.92658942, 0.92809053, 0.94694022])

In [283]:
average_score_skfold_LGB = S_k_fold_LGB.mean() * 100
average_score_skfold_LGB

93.56466815809814

In [284]:
# #lEAVE ONE OUT VALIDATION

# loo_LGB = LeaveOneOut()
# loo_LGB = cross_val_score(model_LGB, X_train, y_train, cv= loo_LGB, scoring = 'f1_macro')

# average_score_LGB = loo_LGB.mean() * 100
# average_score_LGB

In [285]:
new_predictions = model_LGB.predict(X_test)

cnf_mat = confusion_matrix(y_test, new_predictions)
cnf_mat


array([[1239,   49],
       [  76,  636]])

In [286]:
accuracy = accuracy_score(new_predictions, y_test)
print( 'Accuracy: {}' .format(round(accuracy*100 ), 4))

Accuracy: 94
