## AMADI CHIMEREMMA SANDRA

### 1572c1a4b801f000

# Machine Learning: Classification - Electrical Grid Stability Simulated dataset.







In [39]:
import matplotlib.pyplot as plt 
import pandas as pd
import seaborn as sns
import numpy as np

from sklearn import tree
from sklearn.metrics import (precision_score, accuracy_score, 
                             recall_score, f1_score, 
                             confusion_matrix)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier


%matplotlib inline
import warnings

In [31]:
grid = pd.read_csv('Data_for_UCI_named.csv', delimiter = ',')
grid

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stab,stabf
0,2.959060,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,0.055347,unstable
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.781760,-0.005957,stable
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.277210,-0.920492,0.163041,0.766689,0.839444,0.109853,0.003471,unstable
3,0.716415,7.669600,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,0.028871,unstable
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.797110,0.455450,0.656947,0.820923,0.049860,unstable
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,2.930406,9.487627,2.376523,6.187797,3.343416,-0.658054,-1.449106,-1.236256,0.601709,0.779642,0.813512,0.608385,0.023892,unstable
9996,3.392299,1.274827,2.954947,6.894759,4.349512,-1.663661,-0.952437,-1.733414,0.502079,0.567242,0.285880,0.366120,-0.025803,stable
9997,2.364034,2.842030,8.776391,1.008906,4.299976,-1.380719,-0.943884,-1.975373,0.487838,0.986505,0.149286,0.145984,-0.031810,stable
9998,9.631511,3.994398,2.757071,7.821347,2.514755,-0.966330,-0.649915,-0.898510,0.365246,0.587558,0.889118,0.818391,0.037789,unstable


### Looking at the dataframe above, we can observe some relationship between stab and stabf columns.

Whenever stab <= 0, the corresponding value in stabf will be "stable"; if otherwise, stabf will be "unstable".

Because of this relationship, we are going to drop the stab column in the dataframe. This will essentially make stabf the sole dependent variable.

In [None]:
#dropping the stab column
grid.drop("stab", axis=1, inplace=True)

In [10]:
#check for null values
print (grid.isna().sum())

tau1     0
tau2     0
tau3     0
tau4     0
p1       0
p2       0
p3       0
p4       0
g1       0
g2       0
g3       0
g4       0
stabf    0
dtype: int64


In [16]:
#distribution of the values in the stabf column
grid['stabf'].value_counts()

unstable    6380
stable      3620
Name: stabf, dtype: int64

In [17]:
#splitting the data into training and testing sets.
X= grid.drop(['stabf'], axis=1)
y = grid['stabf']

In [18]:
#data is considerably clean so we can proceed with building our classification model
#Split the data into train and test sets
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [23]:
# Apply StandardScaler to the training and test sets
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)


In [24]:
#Train a Random Forest Classifier
rf_classifier = RandomForestClassifier(random_state=1)
rf_classifier.fit(x_train, y_train)
rf_predictions = rf_classifier.predict(x_test)

In [47]:
metric_scores = [precision_score, recall_score, f1_score, accuracy_score]
for score in metric_scores:
    if score != accuracy_score:
        print(score.__name__.replace('_', ' ').title()+':', (score(y_test, rf_predictions, pos_label='stable')))
    else:
        print(score.__name__.replace('_', ' ').title()+':', (score(y_test, rf_predictions)))

Precision Score: 0.9191176470588235
Recall Score: 0.8778089887640449
F1 Score: 0.8979885057471264
Accuracy Score: 0.929


In [41]:
# Print confusion matrix setting the label parameter as ['unstable', 'stable']
cnf_mat = confusion_matrix(y_test, rf_predictions, labels=['unstable', 'stable'])
print (cnf_mat)

[[1233   55]
 [  87  625]]


In [25]:
#Train on Extra Trees Classifier
et_classifier = ExtraTreesClassifier(random_state=1)
et_classifier.fit(x_train, y_train)
et_predictions = et_classifier.predict(x_test)


In [27]:
#Train an XGBoost Classifier
xgb_classifier = XGBClassifier(random_state=1)
xgb_classifier.fit(x_train, y_train)
xgb_predictions = xgb_classifier.predict(x_test)

NameError: name 'XGBClassifier' is not defined

In [None]:
#Train a LightGBM Classifier
lgb_classifier = lgb.LGBMClassifier(random_state=1)
lgb_classifier.fit(x_train, y_train)
lgb_predictions = lgb_classifier.predict(x_test)

In [None]:
#Evaluate the models' performance on the test set
# Random Forest
rf_accuracy = accuracy_score(y_test, rf_predictions)

# Extra Trees
et_accuracy = accuracy_score(y_test, et_predictions)

# XGBoost
xgb_accuracy = accuracy_score(y_test, xgb_predictions)

# LightGBM
lgb_accuracy = accuracy_score(y_test, lgb_predictions)

In [22]:
rf_accuracy

NameError: name 'rf_accuracy' is not defined