# HDSC Winter 22
## Stage C Tang-Along Project
### Name: Ibraheem Kekere-Ekun

Predictive features:

'tau1' to 'tau4': the reaction time of each network participant, a real value within the range 0.5 to 10 ('tau1' corresponds to the supplier node, 'tau2' to 'tau4' to the consumer nodes);
'p1' to 'p4': nominal power produced (positive) or consumed (negative) by each network participant, a real value within the range -2.0 to -0.5 for consumers ('p2' to 'p4'). As the total power consumed equals the total power generated, p1 (supplier node) = - (p2 + p3 + p4);
'g1' to 'g4': price elasticity coefficient for each network participant, a real value within the range 0.05 to 1.00 ('g1' corresponds to the supplier node, 'g2' to 'g4' to the consumer nodes; 'g' stands for 'gamma');

Dependent variables:

'stab': the maximum real part of the characteristic differential equation root (if positive, the system is linearly unstable; if negative, linearly stable);
'stabf': a categorical (binary) label ('stable' or 'unstable').

In [1]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Load dataset
path = r"https://archive.ics.uci.edu/ml/machine-learning-databases/00471/Data_for_UCI_named.csv"
df = pd.read_csv(path)
df.head()

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stab,stabf
0,2.95906,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,0.055347,unstable
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.78176,-0.005957,stable
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.27721,-0.920492,0.163041,0.766689,0.839444,0.109853,0.003471,unstable
3,0.716415,7.6696,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,0.028871,unstable
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.79711,0.45545,0.656947,0.820923,0.04986,unstable


#### Drop the 'stab' column

In [3]:
df.drop('stab', axis = 1, inplace = True)
df.head()

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stabf
0,2.95906,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,unstable
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.78176,stable
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.27721,-0.920492,0.163041,0.766689,0.839444,0.109853,unstable
3,0.716415,7.6696,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,unstable
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.79711,0.45545,0.656947,0.820923,unstable


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 13 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   tau1    10000 non-null  float64
 1   tau2    10000 non-null  float64
 2   tau3    10000 non-null  float64
 3   tau4    10000 non-null  float64
 4   p1      10000 non-null  float64
 5   p2      10000 non-null  float64
 6   p3      10000 non-null  float64
 7   p4      10000 non-null  float64
 8   g1      10000 non-null  float64
 9   g2      10000 non-null  float64
 10  g3      10000 non-null  float64
 11  g4      10000 non-null  float64
 12  stabf   10000 non-null  object 
dtypes: float64(12), object(1)
memory usage: 1015.8+ KB


There are no missing values in the dataset

In [5]:
df.describe(include = 'all')

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stabf
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000
unique,,,,,,,,,,,,,2
top,,,,,,,,,,,,,unstable
freq,,,,,,,,,,,,,6380
mean,5.25,5.250001,5.250004,5.249997,3.75,-1.25,-1.25,-1.25,0.525,0.525,0.525,0.525,
std,2.742548,2.742549,2.742549,2.742556,0.75216,0.433035,0.433035,0.433035,0.274256,0.274255,0.274255,0.274255,
min,0.500793,0.500141,0.500788,0.500473,1.58259,-1.999891,-1.999945,-1.999926,0.050009,0.050053,0.050054,0.050028,
25%,2.874892,2.87514,2.875522,2.87495,3.2183,-1.624901,-1.625025,-1.62496,0.287521,0.287552,0.287514,0.287494,
50%,5.250004,5.249981,5.249979,5.249734,3.751025,-1.249966,-1.249974,-1.250007,0.525009,0.525003,0.525015,0.525002,
75%,7.62469,7.624893,7.624948,7.624838,4.28242,-0.874977,-0.875043,-0.875065,0.762435,0.76249,0.76244,0.762433,


#### Split into features and target

In [6]:
X = df.drop('stabf', axis = 1)
y = df.stabf

#### Split into training and testing

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

#### Preprocessing

In [8]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

In [9]:
Fit = sc.fit(X_train)
X_trainScaled = Fit.transform(X_train)
X_testScaled = Fit.transform(X_test)

#### Random Forests

In [10]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(random_state = 1)
rfc_model = rfc.fit(X_trainScaled, y_train)
y_pred_rfc = rfc_model.predict(X_testScaled)

In [11]:
from sklearn.metrics import classification_report
print(classification_report(y_pred_rfc, y_test))

              precision    recall  f1-score   support

      stable       0.88      0.92      0.90       680
    unstable       0.96      0.93      0.95      1320

    accuracy                           0.93      2000
   macro avg       0.92      0.93      0.92      2000
weighted avg       0.93      0.93      0.93      2000



#### Extra Trees Classifier

In [12]:
from sklearn.ensemble import ExtraTreesClassifier
etc = ExtraTreesClassifier(random_state = 1)
etc_model = etc.fit(X_trainScaled, y_train)
y_pred_etc = etc_model.predict(X_testScaled)

#### LightGBM Classifier

In [13]:
import lightgbm
from lightgbm import LGBMClassifier
lgb = LGBMClassifier(random_state = 1)
lgb_model = lgb.fit(X_trainScaled, y_train)
y_pred_lgb = lgb.predict(X_testScaled)

#### XGBoost

In [14]:
from xgboost import XGBClassifier
xgb = XGBClassifier(random_state = 1)
xgb_model = xgb.fit(X_trainScaled, y_train)
y_pred_xgb = xgb_model.predict(X_testScaled)





#### Model Evaluation

In [15]:
from sklearn.metrics import confusion_matrix
def confusionMatrix(y_pred):
    print(confusion_matrix(y_test, y_pred))

In [16]:
def classReport(y_pred):
    print(classification_report(y_pred, y_test))

#### Confusion Matrix

In [17]:
print("Confusion Matrix for Random Forests Classifier", confusionMatrix(y_pred_rfc))
print("Confusion Matrix for Extra Trees Classifier", confusionMatrix(y_pred_etc))
print("Confusion Matrix for Lgbm Classifer", confusionMatrix(y_pred_lgb))
print("Confusion Matrix for Xgboost Classifer", confusionMatrix(y_pred_xgb))

[[ 625   87]
 [  55 1233]]
Confusion Matrix for Random Forests Classifier None
[[ 606  106]
 [  38 1250]]
Confusion Matrix for Extra Trees Classifier None
[[ 641   71]
 [  50 1238]]
Confusion Matrix for Lgbm Classifer None
[[ 648   64]
 [  45 1243]]
Confusion Matrix for Xgboost Classifer None


#### Classification report

In [18]:
print("Classification report for Random Forests Classifer", classReport(y_pred_rfc))
print("Classification report for Extra Trees Classifer", classReport(y_pred_etc))
print("Classification report for Lightgbm Classifer", classReport(y_pred_lgb))
print("Classification report for Xgboost Classifer", classReport(y_pred_xgb))

              precision    recall  f1-score   support

      stable       0.88      0.92      0.90       680
    unstable       0.96      0.93      0.95      1320

    accuracy                           0.93      2000
   macro avg       0.92      0.93      0.92      2000
weighted avg       0.93      0.93      0.93      2000

Classification report for Random Forests Classifer None
              precision    recall  f1-score   support

      stable       0.85      0.94      0.89       644
    unstable       0.97      0.92      0.95      1356

    accuracy                           0.93      2000
   macro avg       0.91      0.93      0.92      2000
weighted avg       0.93      0.93      0.93      2000

Classification report for Extra Trees Classifer None
              precision    recall  f1-score   support

      stable       0.90      0.93      0.91       691
    unstable       0.96      0.95      0.95      1309

    accuracy                           0.94      2000
   macro avg       

In [20]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred_lgb)

0.9395

In [29]:
accuracy_score(y_test, y_pred_rfc)

0.929

In [30]:
accuracy_score(y_test, y_pred_xgb)

0.9455

#### Randomized search

In [32]:
from sklearn.model_selection import RandomizedSearchCV

In [36]:
from scipy.stats import uniform as sp_randFloat
from scipy.stats import randint as sp_randInt
parameters = {'learning_rate': sp_randFloat(),
                'subsample'    : sp_randFloat(),
                'n_estimators' : sp_randInt(100, 1000),
                'max_depth'    : sp_randInt(4, 10) }

In [45]:
etc_model2 = ExtraTreesClassifier(random_state = 1)

ValueError: Invalid parameter learning_rate for estimator ExtraTreesClassifier(random_state=1). Check the list of available parameters with `estimator.get_params().keys()`.

In [53]:
n_estimators = [100, 300, 500, 1000]
max_features = ['auto', 'sqrt', 'log2', None]
max_depth = [int(x) for x in np.linspace(10, 120, num = 12)]
min_samples_split = [2, 5, 7]
min_samples_leaf = [4, 6, 8]

random_grid = {'n_estimators': n_estimators,

'max_features': max_features,

'max_depth': max_depth,

'min_samples_split': min_samples_split,

'min_samples_leaf': min_samples_leaf}


In [57]:
randm_src = RandomizedSearchCV(estimator = etc_model2, param_distributions = random_grid, n_iter = 10, cv = 5, verbose = 1, random_state= 1, n_jobs = -1)
randm_src.fit(X_trainScaled, y_train)

print ('Random grid: ', random_grid, '\n')

print ('Best Parameters: ', randm_src.best_params_, ' \n')

Fitting 5 folds for each of 10 candidates, totalling 50 fits
Random grid:  {'n_estimators': [100, 300, 500, 1000], 'max_features': ['auto', 'sqrt', 'log2', None], 'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120], 'min_samples_split': [2, 5, 7], 'min_samples_leaf': [4, 6, 8]} 

Best Parameters:  {'n_estimators': 1000, 'min_samples_split': 5, 'min_samples_leaf': 6, 'max_features': None, 'max_depth': 60}  



In [58]:
randm_src.best_params_

{'n_estimators': 1000,
 'min_samples_split': 5,
 'min_samples_leaf': 6,
 'max_features': None,
 'max_depth': 60}

In [59]:
etc2 = ExtraTreesClassifier(n_estimators = 1000, min_samples_split = 5, min_samples_leaf = 6, max_features =  None, max_depth = 60)
etc2_model = etc2.fit(X_trainScaled, y_train)
y_pred_etc2 = etc2_model.predict(X_testScaled)

In [60]:
print("Classification report for Extra Trees Classifer 2", classReport(y_pred_etc2))


              precision    recall  f1-score   support

      stable       0.88      0.92      0.90       676
    unstable       0.96      0.93      0.95      1324

    accuracy                           0.93      2000
   macro avg       0.92      0.93      0.92      2000
weighted avg       0.93      0.93      0.93      2000

Classification report for Extra Trees Classifer 2 None
