In [1]:
# Import numpy, pandas, matplotlib, seaborn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
#loading the data
data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/00471/Data_for_UCI_named.csv')
data.head()

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stab,stabf
0,2.95906,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,0.055347,unstable
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.78176,-0.005957,stable
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.27721,-0.920492,0.163041,0.766689,0.839444,0.109853,0.003471,unstable
3,0.716415,7.6696,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,0.028871,unstable
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.79711,0.45545,0.656947,0.820923,0.04986,unstable


In [3]:
data.shape

(10000, 14)

In [4]:
data = data.drop(columns ='stab')

In [5]:
data.head()

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stabf
0,2.95906,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,unstable
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.78176,stable
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.27721,-0.920492,0.163041,0.766689,0.839444,0.109853,unstable
3,0.716415,7.6696,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,unstable
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.79711,0.45545,0.656947,0.820923,unstable


In [6]:
data.isna().sum()

tau1     0
tau2     0
tau3     0
tau4     0
p1       0
p2       0
p3       0
p4       0
g1       0
g2       0
g3       0
g4       0
stabf    0
dtype: int64

In [7]:
# Splitting the dataset
X = data.drop(['stabf'],axis = 1)
y = data['stabf']

In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [9]:
y_train.value_counts()

unstable    5092
stable      2908
Name: stabf, dtype: int64

In [10]:
y_test.value_counts()

unstable    1288
stable       712
Name: stabf, dtype: int64

In [11]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [12]:
#importing required libraries 
import lightgbm as lgbm
from xgboost import XGBClassifier
from sklearn.model_selection import RandomizedSearchCV 
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier 
from sklearn.metrics import recall_score, accuracy_score, confusion_matrix, classification_report


Random Forest

In [13]:
Random_Forest = RandomForestClassifier(random_state=1)
Random_Forest.fit(X_train_scaled ,y_train)

RandomForestClassifier(random_state=1)

In [15]:
rf_predict = Random_Forest.predict(X_test_scaled)
accuracy_score(rf_predict, y_test)

0.929

In [17]:
confusion_matrix(rf_predict, y_test)

array([[ 625,   55],
       [  87, 1233]])

In [25]:
print(classification_report(rf_predict, y_test))


              precision    recall  f1-score   support

      stable       0.88      0.92      0.90       680
    unstable       0.96      0.93      0.95      1320

    accuracy                           0.93      2000
   macro avg       0.92      0.93      0.92      2000
weighted avg       0.93      0.93      0.93      2000



Extra Trees

In [29]:
Tree_Class = ExtraTreesClassifier (random_state = 1)  
Tree_Class.fit(X_train_scaled,y_train)
et_predict = Tree_Class.predict(X_test_scaled)
accuracy_score(y_test, et_predict)

0.928

In [28]:
print(confusion_matrix(et_predict, y_test))

[[ 606   38]
 [ 106 1250]]


In [32]:
print(classification_report(et_predict, y_test))

              precision    recall  f1-score   support

      stable       0.85      0.94      0.89       644
    unstable       0.97      0.92      0.95      1356

    accuracy                           0.93      2000
   macro avg       0.91      0.93      0.92      2000
weighted avg       0.93      0.93      0.93      2000



In [33]:
n_estimators = [100, 300, 500, 1000]    
min_samples_split = [7, 5, 2, 2]
min_samples_leaf = [4, 6, 8, 8]
max_features = [None,'auto','log2',None] 
hyperparameter_grid = {'n_estimators': n_estimators,  'min_samples_split': min_samples_split, 'min_samples_leaf': min_samples_leaf,'max_features': max_features}

In [34]:
Rand_search = RandomizedSearchCV(estimator = Tree_Class, param_distributions= hyperparameter_grid, random_state=1,cv = 5, n_iter=10,scoring='accuracy',n_jobs=-1, verbose=1)
search = Rand_search.fit(X_train_scaled,y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


In [35]:
#checking for the best parameter for the model
search.best_params_

{'max_features': None,
 'min_samples_leaf': 6,
 'min_samples_split': 2,
 'n_estimators': 1000}

In [36]:
feature_imp = search.best_estimator_.feature_importances_

In [41]:
print('Most important features:',max(zip(feature_imp,X)))
print('Least important features:',min(zip(feature_imp,X)))

Most important features: (0.13842145507674694, 'tau2')
Least important features: (0.005354328485013283, 'p1')


Extra Trees 2

In [65]:
ET_2 = ExtraTreesClassifier(n_estimators=1000, min_samples_split=2, min_samples_leaf=8, max_features=None, random_state = 1)
ET_2.fit(X_train_scaled, y_train)
ET_2= ET_2.predict(X_test_scaled)
print("Accuracy is:",accuracy_score(y_test, ET_2))

Accuracy is: 0.927


In [45]:
print(confusion_matrix(ET_2, y_test))

[[ 627   51]
 [  85 1237]]


In [46]:
print(classification_report(ET_2, y_test))

              precision    recall  f1-score   support

      stable       0.88      0.92      0.90       678
    unstable       0.96      0.94      0.95      1322

    accuracy                           0.93      2000
   macro avg       0.92      0.93      0.93      2000
weighted avg       0.93      0.93      0.93      2000



XGBoost

In [61]:
xg_boost = XGBClassifier(random_state =1)
xg_boost.fit(X_train_scaled, y_train)
xg_predict = xg_boost.predict(X_test_scaled)
accuracy_score(y_test, xg_predict)

0.9195

In [64]:
print(confusion_matrix(xg_predict, y_test))

[[ 603   52]
 [ 109 1236]]


In [63]:
print(classification_report(xg_predict, y_test))

              precision    recall  f1-score   support

      stable       0.85      0.92      0.88       655
    unstable       0.96      0.92      0.94      1345

    accuracy                           0.92      2000
   macro avg       0.90      0.92      0.91      2000
weighted avg       0.92      0.92      0.92      2000



LGBM

In [55]:
import lightgbm as lgbm
from lightgbm import LGBMClassifier
lgbm = lgbm.LGBMClassifier(random_state=1)
lgbm.fit(X_train_scaled,y_train)
lgbm_predict  = lgbm.predict(X_test_scaled)
accuracy_score(y_test, lgbm_predict)

0.9375

In [56]:
print(confusion_matrix(y_test, lgbm_predict))

[[ 635   77]
 [  48 1240]]


In [57]:
print(classification_report(y_test, lgbm_predict))

              precision    recall  f1-score   support

      stable       0.93      0.89      0.91       712
    unstable       0.94      0.96      0.95      1288

    accuracy                           0.94      2000
   macro avg       0.94      0.93      0.93      2000
weighted avg       0.94      0.94      0.94      2000

