## Stage C Classification - Quiz Code

In [1]:
#importing libraries
import numpy as np
import pandas as pd

In [2]:
#load dataset
data = pd.read_csv('Data_for_UCI_named.csv')
data.head()

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stab,stabf
0,2.95906,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,0.055347,unstable
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.78176,-0.005957,stable
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.27721,-0.920492,0.163041,0.766689,0.839444,0.109853,0.003471,unstable
3,0.716415,7.6696,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,0.028871,unstable
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.79711,0.45545,0.656947,0.820923,0.04986,unstable


In [3]:
print(data.shape)
data['stabf'].value_counts()

(10000, 14)


unstable    6380
stable      3620
Name: stabf, dtype: int64

In [4]:
#define predictors and response variable
x = data.drop(['stabf', 'stab'], axis=1)
y = data['stabf']

In [5]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)
y_test.value_counts()

unstable    1288
stable       712
Name: stabf, dtype: int64

In [6]:
#Standardize training set
from sklearn.preprocessing import StandardScaler
stan = StandardScaler()
x_train_transformed = stan.fit_transform(x_train)
x_test_transformed = stan.transform(x_test)

### Train Models

In [45]:
#Train RandomForestClassifier
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(random_state=1)
rfc.fit(x_train_transformed, y_train)
rfc_predict = rfc.predict(x_test_transformed)

In [8]:
#Train with ExtraTreesClassifier
from sklearn.ensemble import ExtraTreesClassifier
ex_trees= ExtraTreesClassifier(random_state=1)
ex_trees.fit(x_train_transformed, y_train)
ex_trees_predict = ex_trees.predict(x_test_transformed)

In [10]:
#Hyperparameter Tuning Using RandomizedSearchCV on ExtraTreesClassifier
from sklearn.model_selection import RandomizedSearchCV
n_estimators = [50, 100, 300, 500, 1000]
min_samples_split = [2, 3, 5, 7, 9]
min_samples_leaf = [1, 2, 4, 6, 8]
max_features = ['auto', 'sqrt', 'log2', None]
hyperparameter_grid = {'n_estimators': n_estimators,
'min_samples_leaf': min_samples_leaf,
'min_samples_split': min_samples_split,
'max_features': max_features}
clf1 = RandomizedSearchCV(ExtraTreesClassifier(random_state=1), hyperparameter_grid, random_state=1)
search = clf1.fit(x_train_transformed, y_train)
search_predict = clf1.predict(x_test_transformed)

In [28]:
search.best_params_.values()

dict_values([1000, 2, 8, None])

In [43]:
#Train XGBClassifier
from xgboost import XGBClassifier
xgb = XGBClassifier(random_state=1)
xgb.fit(x_train_transformed, y_train)
xgb_predict = xgb.predict(x_test_transformed)

In [15]:
#Train LGBMClassifier
from lightgbm import LGBMClassifier
gboost = LGBMClassifier(random_state=1)
gboost.fit(x_train_transformed, y_train)
gboost_predict = gboost.predict(x_test_transformed)

### Evaluation of Models on Test Set

#### Evaluating RandomForestClassifier

In [16]:
#import classification prediction metrics
from sklearn.metrics import accuracy_score, precision_score, confusion_matrix, recall_score, f1_score

#Confusion Matrix
rfc_cfn_mat = confusion_matrix(y_true=y_test, y_pred=rfc_predict, labels=['stable', 'unstable'])
rfc_cfn_mat

#Accuracy
rfc_accuracy = accuracy_score(y_true=y_test, y_pred=rfc_predict)
print('RandomForestClassifier_Accuracy: {}'.format(round(rfc_accuracy*100), 2))

#Precision
rfc_precision = precision_score(y_true=y_test, y_pred=rfc_predict, pos_label='stable')
print('RandomForestClassifier_Precision: {}'.format(round(rfc_precision*100), 2))

#Recall
rfc_recall = recall_score(y_true=y_test, y_pred=rfc_predict, pos_label='stable')
print('RandomForestClassifier_Recall: {}'.format(round(rfc_recall*100), 2))

#F1 Score
rfc_f1 = f1_score(y_true=y_test, y_pred=rfc_predict, pos_label='stable')
print('RandomForestClassifier_F1 Score: {}'.format(round(rfc_f1*100), 2))

RandomForestClassifier_Accuracy: 93.0
RandomForestClassifier_Precision: 92.0
RandomForestClassifier_Recall: 88.0
RandomForestClassifier_F1 Score: 90.0


In [40]:
rfc_accuracy = accuracy_score(y_true=y_test, y_pred=rfc_predict)
print('RandomForestClassifier_Accuracy: {}', rfc_accuracy.round(4))

RandomForestClassifier_Accuracy: {} 0.929


#### Evaluating ExtraTreesClassifier

In [17]:
#Confusion Matrix
ex_trees_cfn_mat = confusion_matrix(y_true=y_test, y_pred=ex_trees_predict, labels=['stable', 'unstable'])
ex_trees_cfn_mat

#Accuracy
ex_trees_accuracy = accuracy_score(y_true=y_test, y_pred=ex_trees_predict)
print('RandomForestClassifier_Accuracy: {}'.format(round(ex_trees_accuracy*100), 2))

#Precision
ex_trees_precision = precision_score(y_true=y_test, y_pred=ex_trees_predict, pos_label='stable')
print('RandomForestClassifier_Precision: {}'.format(round(ex_trees_precision*100), 2))

#Recall
ex_trees_recall = recall_score(y_true=y_test, y_pred=ex_trees_predict, pos_label='stable')
print('RandomForestClassifier_Recall: {}'.format(round(ex_trees_recall*100), 2))

#F1 Score
ex_trees_f1 = f1_score(y_true=y_test, y_pred=ex_trees_predict, pos_label='stable')
print('RandomForestClassifier_F1 Score: {}'.format(round(ex_trees_f1*100), 2))

RandomForestClassifier_Accuracy: 93.0
RandomForestClassifier_Precision: 94.0
RandomForestClassifier_Recall: 85.0
RandomForestClassifier_F1 Score: 89.0


#### Evaluating Improved ExtraTreesClassifier (RandomizedSearchCV)

In [18]:
#Confusion Matrix
search_cfn_mat = confusion_matrix(y_true=y_test, y_pred=search_predict, labels=['stable', 'unstable'])
search_cfn_mat

#Accuracy
search_accuracy = accuracy_score(y_true=y_test, y_pred=search_predict)
print('ExtraTreesClassifier(RandomizedSearchCV)_Accuracy: {}'.format(round(search_accuracy*100), 2))

#Precision
search_precision = precision_score(y_true=y_test, y_pred=search_predict, pos_label='stable')
print('ExtraTreesClassifier(RandomizedSearchCV)_Precision: {}'.format(round(search_precision*100), 2))

#Recall
search_recall = recall_score(y_true=y_test, y_pred=search_predict, pos_label='stable')
print('ExtraTreesClassifier(RandomizedSearchCV)_Recall: {}'.format(round(search_recall*100), 2))

#F1 Score
search_f1 = f1_score(y_true=y_test, y_pred=search_predict, pos_label='stable')
print('ExtraTreesClassifier(RandomizedSearchCV)_F1 Score: {}'.format(round(search_f1*100), 2))

ExtraTreesClassifier(RandomizedSearchCV)_Accuracy: 93.0
ExtraTreesClassifier(RandomizedSearchCV)_Precision: 92.0
ExtraTreesClassifier(RandomizedSearchCV)_Recall: 87.0
ExtraTreesClassifier(RandomizedSearchCV)_F1 Score: 89.0


#### Evaluating XGBClassifier

In [19]:
#Confusion Matrix
xgb_cfn_mat = confusion_matrix(y_true=y_test, y_pred=xgb_predict, labels=['stable', 'unstable'])
xgb_cfn_mat

#Accuracy
xgb_accuracy = accuracy_score(y_true=y_test, y_pred=xgb_predict)
print('XGBClassifier_Accuracy: {}'.format(round(xgb_accuracy*100), 2))

#Precision
xgb_precision = precision_score(y_true=y_test, y_pred=xgb_predict, pos_label='stable')
print('XGBClassifier_Precision: {}'.format(round(xgb_precision*100), 2))

#Recall
xgb_recall = recall_score(y_true=y_test, y_pred=xgb_predict, pos_label='stable')
print('XGBClassifier_Recall: {}'.format(round(xgb_recall*100), 2))

#F1 Score
xgb_f1 = f1_score(y_true=y_test, y_pred=xgb_predict, pos_label='stable')
print('XGBClassifier_F1 Score: {}'.format(round(xgb_f1*100), 2))

XGBClassifier_Accuracy: 95.0
XGBClassifier_Precision: 94.0
XGBClassifier_Recall: 91.0
XGBClassifier_F1 Score: 92.0


In [44]:
#Accuracy
xgb_accuracy = accuracy_score(y_true=y_test, y_pred=xgb_predict)
print('XGBClassifier_Accuracy: {}', xgb_accuracy.round(4))

XGBClassifier_Accuracy: {} 0.9455


#### Evaluating LGBMClassifier

In [20]:
#Confusion Matrix
gboost_cfn_mat = confusion_matrix(y_true=y_test, y_pred=gboost_predict, labels=['stable', 'unstable'])
xgb_cfn_mat

#Accuracy
gboost_accuracy = accuracy_score(y_true=y_test, y_pred=gboost_predict)
print('LGBMClassifier_Accuracy: {}'.format(round(gboost_accuracy*100), 2))

#Precision
gboost_precision = precision_score(y_true=y_test, y_pred=gboost_predict, pos_label='stable')
print('LGBMClassifier_Precision: {}'.format(round(gboost_precision*100), 2))

#Recall
gboost_recall = recall_score(y_true=y_test, y_pred=gboost_predict, pos_label='stable')
print('LGBMClassifier_Recall: {}'.format(round(gboost_recall*100), 2))

#F1 Score
gboost_f1 = f1_score(y_true=y_test, y_pred=gboost_predict, pos_label='stable')
print('LGBMClassifier_F1 Score: {}'.format(round(gboost_f1*100), 2))

LGBMClassifier_Accuracy: 94.0
LGBMClassifier_Precision: 93.0
LGBMClassifier_Recall: 89.0
LGBMClassifier_F1 Score: 91.0


In [27]:
#Accuracy
gboost_accuracy = accuracy_score(y_true=y_test, y_pred=gboost_predict)
print('LGBMClassifier_Accuracy: {}', gboost_accuracy)

LGBMClassifier_Accuracy: {} 0.9375
