In [1]:
import numpy as np
import pandas as pd
import sklearn.utils
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold, StratifiedKFold, LeaveOneOut
from sklearn.metrics import recall_score, accuracy_score, precision_score, f1_score, confusion_matrix, f1_score

In [2]:
uci_data = pd.read_csv('Data_for_UCI_named.csv')

In [3]:
uci_data.head()

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stab,stabf
0,2.95906,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,0.055347,unstable
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.78176,-0.005957,stable
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.27721,-0.920492,0.163041,0.766689,0.839444,0.109853,0.003471,unstable
3,0.716415,7.6696,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,0.028871,unstable
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.79711,0.45545,0.656947,0.820923,0.04986,unstable


In [4]:
uci_data.nunique()

tau1     10000
tau2     10000
tau3     10000
tau4     10000
p1       10000
p2       10000
p3       10000
p4       10000
g1       10000
g2       10000
g3       10000
g4       10000
stab     10000
stabf        2
dtype: int64

In [5]:
uci_data.isna().sum()

tau1     0
tau2     0
tau3     0
tau4     0
p1       0
p2       0
p3       0
p4       0
g1       0
g2       0
g3       0
g4       0
stab     0
stabf    0
dtype: int64

In [6]:
uci_data['stabf'].value_counts()

unstable    6380
stable      3620
Name: stabf, dtype: int64

In [7]:
uci_data_stab = uci_data[uci_data.stabf=='stable']
uci_data_unstab = uci_data[uci_data.stabf=='unstable']
uci_data_stab = uci_data_stab.replace('stable', '1')
uci_data_unstab = uci_data_unstab.replace('unstable', '0')
uci_data_df  =uci_data_stab.append(uci_data_unstab)
uci_data_df

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stab,stabf
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.781760,-0.005957,1
5,6.999209,9.109247,3.784066,4.267788,4.429669,-1.857139,-0.670397,-1.902133,0.261793,0.077930,0.542884,0.469931,-0.017385,1
8,4.689852,4.007747,1.478573,3.733787,4.041300,-1.410344,-1.238204,-1.392751,0.269708,0.250364,0.164941,0.482439,-0.038677,1
10,5.930110,6.730873,6.245138,0.533288,2.327092,-0.702501,-1.116920,-0.507671,0.239816,0.563110,0.164461,0.753701,-0.028411,1
12,1.616787,2.939228,0.819791,4.191804,3.752282,-1.484885,-1.280581,-0.986816,0.899698,0.866546,0.303921,0.077610,-0.048617,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9993,5.754191,3.032743,5.084803,4.633624,5.199250,-1.717030,-1.713212,-1.769009,0.157284,0.975921,0.511555,0.696591,0.050212,0
9994,2.042954,8.514335,8.173809,5.466635,3.783797,-1.639912,-0.662469,-1.481417,0.154129,0.944486,0.053225,0.499109,0.026311,0
9995,2.930406,9.487627,2.376523,6.187797,3.343416,-0.658054,-1.449106,-1.236256,0.601709,0.779642,0.813512,0.608385,0.023892,0
9998,9.631511,3.994398,2.757071,7.821347,2.514755,-0.966330,-0.649915,-0.898510,0.365246,0.587558,0.889118,0.818391,0.037789,0


In [8]:
uci_data_df = sklearn.utils.shuffle(uci_data_df) 
uci_data_df = uci_data_df.reset_index(drop= True )
uci_data_df = uci_data_df.drop(columns= 'stab') 

In [9]:
X = uci_data_df.drop(columns = 'stabf')
y = uci_data_df['stabf']
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
y_train.value_counts()


0    5105
1    2895
Name: stabf, dtype: int64

In [10]:
import imblearn 
from imblearn.over_sampling import SMOTE 
smote = SMOTE(random_state= 1 ) 
x_train_balanced, y_balanced = smote.fit_sample(x_train, y_train) 
x_train_balanced
y_balanced.value_counts()

0    5105
1    5105
Name: stabf, dtype: int64

In [11]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled = scaler.fit_transform(x_train_balanced)
scaled = pd.DataFrame(scaled, columns= x_train_balanced.columns)
scaled


Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4
0,0.667540,0.704540,1.063040,-1.305353,0.468442,0.313951,0.526565,-1.652310,1.269056,-0.419287,-1.624862,-1.502837
1,-0.170446,1.307030,-1.550199,-0.484353,0.328037,0.024158,-0.877454,0.282407,-0.084591,1.107038,0.362963,-0.257787
2,-0.663297,0.139517,0.392101,-0.658457,-0.622439,0.126449,1.486105,-0.530151,-1.548722,-0.884869,1.491793,-0.341744
3,-0.300262,-0.878570,1.507492,0.782168,0.907971,-1.147244,-0.335929,-0.089136,0.249573,-0.427661,0.064706,-1.091765
4,0.394334,-0.354701,-0.032095,-0.989175,2.128772,-0.755373,-1.442975,-1.494274,0.242156,0.087037,1.680202,-1.374661
...,...,...,...,...,...,...,...,...,...,...,...,...
10205,1.710275,-1.416848,0.836872,-0.651312,0.283692,-0.080905,-0.384992,-0.026699,0.005368,-0.361786,-1.631257,1.496884
10206,0.944990,1.396690,-0.826752,-0.345465,-1.136772,0.149164,1.172461,0.651809,-0.997891,-1.271241,0.591015,-0.561509
10207,-1.512282,0.509049,-1.273473,1.745995,0.105707,-0.465294,0.874852,-0.589670,-0.084454,-0.805457,0.655373,0.723838
10208,1.248740,0.512465,-1.267502,-1.509960,-0.706047,1.333661,-1.204744,1.089232,-1.554851,0.849056,1.353464,-0.271933


In [12]:
x_test = x_test.reset_index(drop= True)
scaled_test = scaler.transform(x_test)
scaled_test = pd.DataFrame(scaled_test, columns= x_test.columns)
scaled_test

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4
0,1.743601,-1.101725,1.507908,-0.830962,0.014372,-0.344090,-1.330687,1.647615,-0.338300,-0.664699,1.679250,-1.091143
1,1.400184,0.754133,1.340943,-0.350381,-0.862930,1.535435,0.150972,-0.193582,-0.652707,0.803836,0.085017,1.294098
2,0.123248,-0.081548,0.042485,1.238228,1.357881,-1.601251,-1.565097,0.812193,-0.102321,0.204782,1.091763,-0.495402
3,0.815058,1.170833,0.024813,-0.812279,-1.130813,1.343148,1.745224,-1.126884,-0.254104,-0.636915,1.625880,0.982479
4,0.102958,-0.280354,0.410052,0.157172,1.525309,-0.885826,-1.114189,-0.645163,-0.356243,-0.250221,-0.901981,0.143982
...,...,...,...,...,...,...,...,...,...,...,...,...
1995,1.021621,-0.796788,0.723847,-0.691404,1.230159,-1.737470,0.998446,-1.387382,-1.001473,-1.298267,1.518191,-1.569596
1996,1.135975,-1.019779,1.590712,0.677604,0.643133,-1.270775,0.391671,-0.232002,-1.031633,0.547753,-0.288379,-0.462269
1997,1.536517,-0.692705,-0.737808,-0.064188,0.383095,0.167262,0.508577,-1.339329,1.189353,-0.463300,-1.348430,-0.169622
1998,0.016517,-0.885259,0.005064,-1.047512,-1.256972,1.661476,0.575597,-0.060333,1.751939,1.750837,1.650384,-0.318283


In [13]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
rfc = RandomForestClassifier(n_estimators=200)
rfc.fit(x_train_balanced, y_balanced)
pred_rfc = rfc.predict(x_test)
print(classification_report(y_test, pred_rfc))
pred_rfc

              precision    recall  f1-score   support

           0       0.93      0.93      0.93      1275
           1       0.87      0.88      0.88       725

    accuracy                           0.91      2000
   macro avg       0.90      0.91      0.90      2000
weighted avg       0.91      0.91      0.91      2000



array(['0', '0', '0', ..., '0', '0', '0'], dtype=object)

In [14]:
from sklearn.ensemble import ExtraTreesClassifier
etc = ExtraTreesClassifier(n_estimators=100)
etc.fit(x_train_balanced, y_balanced)
pred_etc = etc.predict(x_test)
print(classification_report(y_test, pred_etc))

              precision    recall  f1-score   support

           0       0.93      0.95      0.94      1275
           1       0.90      0.88      0.89       725

    accuracy                           0.92      2000
   macro avg       0.92      0.91      0.92      2000
weighted avg       0.92      0.92      0.92      2000



In [15]:
from xgboost import XGBClassifier 
xgb = XGBClassifier(random_state = 1)
xgb.fit(x_train_balanced, y_balanced)
xgb_pred = xgb.predict(x_test)
print(classification_report(y_test, xgb_pred))



              precision    recall  f1-score   support

           0       0.96      0.95      0.96      1275
           1       0.92      0.94      0.93       725

    accuracy                           0.95      2000
   macro avg       0.94      0.94      0.94      2000
weighted avg       0.95      0.95      0.95      2000



In [17]:
from lightgbm import LGBMClassifier
lgbm = LGBMClassifier(random_state = 1)
lgbm.fit(x_train_balanced, y_balanced)
lgbm_pred = lgbm.predict(x_test)
print(classification_report(y_test, lgbm_pred))

              precision    recall  f1-score   support

           0       0.96      0.94      0.95      1275
           1       0.90      0.94      0.92       725

    accuracy                           0.94      2000
   macro avg       0.93      0.94      0.94      2000
weighted avg       0.94      0.94      0.94      2000

