# __Electric Grid Stability__
____
Electrical grids require a balance between electricity supply and demand in order to be stable. Conventional systems achieve this balance through demand-driven electricity production. For future grids with a high share of inflexible (i.e., renewable) energy source, the concept of demand response is a promising solution. This implies changes in electricity consumption in relation to electricity price changes. In this work, I’ll build a binary classification model to predict if a grid is stable or unstable using the UCI Electrical Grid Stability Simulated dataset.

The dataset can be found [here.](https://archive.ics.uci.edu/ml/datasets/Electrical+Grid+Stability+Simulated+Data+)

In [1]:
# import the necessary libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
data = pd.read_csv('electrical_grid_stability.csv')
data.head()

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stab,stabf
0,2.95906,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,0.055347,unstable
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.78176,-0.005957,stable
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.27721,-0.920492,0.163041,0.766689,0.839444,0.109853,0.003471,unstable
3,0.716415,7.6696,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,0.028871,unstable
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.79711,0.45545,0.656947,0.820923,0.04986,unstable


In [3]:
# check for null values
data.isnull().sum()

tau1     0
tau2     0
tau3     0
tau4     0
p1       0
p2       0
p3       0
p4       0
g1       0
g2       0
g3       0
g4       0
stab     0
stabf    0
dtype: int64

## Split the Dataset

In [4]:
from sklearn.model_selection import train_test_split

x = data.drop(['stab', 'stabf'], axis=1)
y = data['stabf']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)

print('Value counts for y_train:')
print(y_train.value_counts())

Value counts for y_train:
unstable    5092
stable      2908
Name: stabf, dtype: int64


## Oversampling for the Minority Class
I will use this later to compare prediction results with the unbalanced train set.

In [5]:
from imblearn.over_sampling import SMOTE

smote = SMOTE()
x_train_balanced, y_train_balanced = smote.fit_sample(x_train, y_train)

print('Value counts for y_train_balanced:')
print(y_train_balanced.value_counts())

Value counts for y_train_balanced:
unstable    5092
stable      5092
Name: stabf, dtype: int64


## Standardize the Datasets

In [6]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
standardized_x_train = pd.DataFrame(sc.fit_transform(x_train), columns=x_train.columns)
standardized_x_test = pd.DataFrame(sc.transform(x_test), columns=x_test.columns)

 ## Random Forest Classifier

In [7]:
from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(random_state=1)
rfc.fit(standardized_x_train, y_train)

rfc_pred = rfc.predict(standardized_x_test)

In [8]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
accuracy_score(rfc_pred, y_test)

0.929

In [9]:
confusion_matrix(rfc_pred, y_test)

array([[ 625,   55],
       [  87, 1233]], dtype=int64)

In [10]:
print(classification_report(rfc_pred, y_test))

              precision    recall  f1-score   support

      stable       0.88      0.92      0.90       680
    unstable       0.96      0.93      0.95      1320

    accuracy                           0.93      2000
   macro avg       0.92      0.93      0.92      2000
weighted avg       0.93      0.93      0.93      2000



## Extra Trees Classifier

In [11]:
from sklearn.ensemble import ExtraTreesClassifier

etc = ExtraTreesClassifier(random_state=1)
etc.fit(standardized_x_train, y_train)

etc_pred = etc.predict(standardized_x_test)

In [12]:
accuracy_score(etc_pred, y_test)

0.928

In [13]:
confusion_matrix(etc_pred, y_test)

array([[ 606,   38],
       [ 106, 1250]], dtype=int64)

In [14]:
print(classification_report(etc_pred, y_test))

              precision    recall  f1-score   support

      stable       0.85      0.94      0.89       644
    unstable       0.97      0.92      0.95      1356

    accuracy                           0.93      2000
   macro avg       0.91      0.93      0.92      2000
weighted avg       0.93      0.93      0.93      2000



## XGBoost

In [15]:
from xgboost import XGBClassifier

xgbc = XGBClassifier(random_state=1)
xgbc.fit(standardized_x_train, y_train)

xgbc_pred = xgbc.predict(standardized_x_test)

  data = yaml.load(f.read()) or {}
  defaults = yaml.load(f)


In [16]:
accuracy_score(xgbc_pred, y_test)

0.9455

In [17]:
confusion_matrix(xgbc_pred, y_test)

array([[ 648,   45],
       [  64, 1243]], dtype=int64)

In [18]:
print(classification_report(xgbc_pred, y_test))

              precision    recall  f1-score   support

      stable       0.91      0.94      0.92       693
    unstable       0.97      0.95      0.96      1307

    accuracy                           0.95      2000
   macro avg       0.94      0.94      0.94      2000
weighted avg       0.95      0.95      0.95      2000



## LightGBM

In [19]:
from lightgbm import LGBMClassifier

lgbc = LGBMClassifier(random_state=1)
lgbc.fit(standardized_x_train, y_train)

lgbc_pred = lgbc.predict(standardized_x_test)

In [20]:
accuracy_score(lgbc_pred, y_test)

0.9375

In [21]:
confusion_matrix(lgbc_pred, y_test)

array([[ 635,   48],
       [  77, 1240]], dtype=int64)

In [22]:
print(classification_report(lgbc_pred, y_test))

              precision    recall  f1-score   support

      stable       0.89      0.93      0.91       683
    unstable       0.96      0.94      0.95      1317

    accuracy                           0.94      2000
   macro avg       0.93      0.94      0.93      2000
weighted avg       0.94      0.94      0.94      2000



## Improve Extra Trees Classifier

In [23]:
from sklearn.model_selection import RandomizedSearchCV

n_estimators = [50, 100, 300, 500, 1000]
min_samples_split = [2, 3, 5, 7, 9]
min_samples_leaf = [1, 2, 4, 6, 8]
max_features = ['auto', 'sqrt', 'log2', None]

hyperparameter_grid = {'n_estimators': n_estimators, 'min_samples_leaf': min_samples_leaf, 
                       'min_samples_split': min_samples_split, 'max_features': max_features}

etc = ExtraTreesClassifier(random_state=1)
grid_search = RandomizedSearchCV(estimator=etc, param_distributions=hyperparameter_grid)

In [24]:
grid_search.fit(standardized_x_train, y_train)

RandomizedSearchCV(estimator=ExtraTreesClassifier(random_state=1),
                   param_distributions={'max_features': ['auto', 'sqrt', 'log2',
                                                         None],
                                        'min_samples_leaf': [1, 2, 4, 6, 8],
                                        'min_samples_split': [2, 3, 5, 7, 9],
                                        'n_estimators': [50, 100, 300, 500,
                                                         1000]})

In [25]:
grid_search.best_estimator_
grid_search.best_params_

{'n_estimators': 300,
 'min_samples_split': 7,
 'min_samples_leaf': 4,
 'max_features': None}

In [26]:
best_etc = grid_search.best_estimator_

best_etc.fit(standardized_x_train, y_train)
best_etc_pred = best_etc.predict(standardized_x_test)

In [27]:
best_etc.feature_importances_

array([0.13310693, 0.13520328, 0.12958449, 0.12994923, 0.00815238,
       0.0110013 , 0.01082487, 0.01022912, 0.10300776, 0.10789238,
       0.11225116, 0.10879709])

In [28]:
accuracy_score(best_etc_pred, y_test)

0.933

In [29]:
confusion_matrix(best_etc_pred, y_test)

array([[ 627,   49],
       [  85, 1239]], dtype=int64)

In [30]:
print(classification_report(best_etc_pred, y_test))

              precision    recall  f1-score   support

      stable       0.88      0.93      0.90       676
    unstable       0.96      0.94      0.95      1324

    accuracy                           0.93      2000
   macro avg       0.92      0.93      0.93      2000
weighted avg       0.93      0.93      0.93      2000



# __Using the Over-sampled Dataset__

In [31]:
from sklearn.preprocessing import StandardScaler

x_test.reset_index(inplace=True, drop=True)

sc = StandardScaler()
standardized_x_train = pd.DataFrame(sc.fit_transform(x_train_balanced), columns=x_train_balanced.columns)
standardized_x_test = pd.DataFrame(sc.transform(x_test), columns=x_test.columns)

In [32]:
x_train_balanced.shape

(10184, 12)

In [33]:
x_test.shape

(2000, 12)

## Random Forest Classifier

In [34]:
rfc = RandomForestClassifier(random_state=1)
rfc.fit(standardized_x_train, y_train_balanced)

rfc_pred = rfc.predict(standardized_x_test)

In [35]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
accuracy_score(rfc_pred, y_test)

0.9175

In [36]:
confusion_matrix(rfc_pred, y_test)

array([[ 646,   99],
       [  66, 1189]], dtype=int64)

In [37]:
print(classification_report(rfc_pred, y_test))

              precision    recall  f1-score   support

      stable       0.91      0.87      0.89       745
    unstable       0.92      0.95      0.94      1255

    accuracy                           0.92      2000
   macro avg       0.92      0.91      0.91      2000
weighted avg       0.92      0.92      0.92      2000



## Extra Trees Classifier

In [38]:
etc = ExtraTreesClassifier(random_state=1)
etc.fit(standardized_x_train, y_train_balanced)

etc_pred = etc.predict(standardized_x_test)

In [39]:
accuracy_score(etc_pred, y_test)

0.9225

In [40]:
confusion_matrix(etc_pred, y_test)

array([[ 636,   79],
       [  76, 1209]], dtype=int64)

In [41]:
print(classification_report(etc_pred, y_test))

              precision    recall  f1-score   support

      stable       0.89      0.89      0.89       715
    unstable       0.94      0.94      0.94      1285

    accuracy                           0.92      2000
   macro avg       0.92      0.92      0.92      2000
weighted avg       0.92      0.92      0.92      2000



## XGBoost

In [42]:
xgbc = XGBClassifier(random_state=1)
xgbc.fit(standardized_x_train, y_train_balanced)

xgbc_pred = xgbc.predict(standardized_x_test)

In [43]:
accuracy_score(xgbc_pred, y_test)

0.944

In [44]:
confusion_matrix(xgbc_pred, y_test)

array([[ 661,   61],
       [  51, 1227]], dtype=int64)

In [45]:
print(classification_report(xgbc_pred, y_test))

              precision    recall  f1-score   support

      stable       0.93      0.92      0.92       722
    unstable       0.95      0.96      0.96      1278

    accuracy                           0.94      2000
   macro avg       0.94      0.94      0.94      2000
weighted avg       0.94      0.94      0.94      2000



## LightGBM

In [46]:
lgbc = LGBMClassifier(random_state=1)
lgbc.fit(standardized_x_train, y_train_balanced)

lgbc_pred = lgbc.predict(standardized_x_test)

In [47]:
accuracy_score(lgbc_pred, y_test)

0.9345

In [48]:
confusion_matrix(lgbc_pred, y_test)

array([[ 662,   81],
       [  50, 1207]], dtype=int64)

In [49]:
print(classification_report(lgbc_pred, y_test))

              precision    recall  f1-score   support

      stable       0.93      0.89      0.91       743
    unstable       0.94      0.96      0.95      1257

    accuracy                           0.93      2000
   macro avg       0.93      0.93      0.93      2000
weighted avg       0.93      0.93      0.93      2000



## Improve Extra Trees Classifier

In [50]:
grid_search.fit(standardized_x_train, y_train_balanced)

RandomizedSearchCV(estimator=ExtraTreesClassifier(random_state=1),
                   param_distributions={'max_features': ['auto', 'sqrt', 'log2',
                                                         None],
                                        'min_samples_leaf': [1, 2, 4, 6, 8],
                                        'min_samples_split': [2, 3, 5, 7, 9],
                                        'n_estimators': [50, 100, 300, 500,
                                                         1000]})

In [51]:
best_etc = grid_search.best_estimator_

best_etc.fit(standardized_x_train, y_train_balanced)
best_etc_pred = best_etc.predict(standardized_x_test)

In [52]:
accuracy_score(best_etc_pred, y_test)

0.9215

In [53]:
confusion_matrix(best_etc_pred, y_test)

array([[ 645,   90],
       [  67, 1198]], dtype=int64)

In [54]:
print(classification_report(best_etc_pred, y_test))

              precision    recall  f1-score   support

      stable       0.91      0.88      0.89       735
    unstable       0.93      0.95      0.94      1265

    accuracy                           0.92      2000
   macro avg       0.92      0.91      0.92      2000
weighted avg       0.92      0.92      0.92      2000



In [55]:
grid_search.best_params_

{'n_estimators': 500,
 'min_samples_split': 9,
 'min_samples_leaf': 1,
 'max_features': 'sqrt'}

In [56]:
best_etc.feature_importances_.sort_values

AttributeError: 'numpy.ndarray' object has no attribute 'sort_values'