## Model Training

In [4]:
import pandas as pd
import numpy as np

# Modelling
from sklearn.metrics import confusion_matrix, mean_absolute_error, r2_score, mean_squared_error, accuracy_score, precision_score, f1_score, recall_score
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.svm import SVR, SVC
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold
from sklearn.linear_model import LinearRegression, Ridge, Lasso, LogisticRegression
from sklearn.model_selection import RandomizedSearchCV
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
import warnings

In [29]:
smart_grid = pd.read_csv("/Users/sanjaydevarajan/Desktop/Dynamic_Grid_Management/notebooks/data/Power_Consumption.csv")
smart_grid.head()

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stab,stabf
0,2.95906,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,0.055347,unstable
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.78176,-0.005957,stable
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.27721,-0.920492,0.163041,0.766689,0.839444,0.109853,0.003471,unstable
3,0.716415,7.6696,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,0.028871,unstable
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.79711,0.45545,0.656947,0.820923,0.04986,unstable


In [30]:
smart_grid.columns = smart_grid.columns.str.replace("tau", "delay").str.replace("p", "power").str.replace("g", "adapt")
smart_grid.columns

Index(['delay1', 'delay2', 'delay3', 'delay4', 'power1', 'power2', 'power3',
       'power4', 'adapt1', 'adapt2', 'adapt3', 'adapt4', 'stab', 'stabf'],
      dtype='object')

## Preparing X and Y variables

In [52]:
dummies = pd.get_dummies(smart_grid["stabf"], dtype='int')
smart_grid_new = pd.concat([smart_grid, dummies], axis=1)
smart_grid_new.drop(["stabf", "unstable"],axis=1, inplace=True)
smart_grid_new.head()

Unnamed: 0,delay1,delay2,delay3,delay4,power1,power2,power3,power4,adapt1,adapt2,adapt3,adapt4,stab,stable
0,2.95906,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,0.055347,0
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.78176,-0.005957,1
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.27721,-0.920492,0.163041,0.766689,0.839444,0.109853,0.003471,0
3,0.716415,7.6696,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,0.028871,0
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.79711,0.45545,0.656947,0.820923,0.04986,0


In [61]:
# Create Column Transformer with 3 types of transformers
from sklearn.preprocessing import MinMaxScaler
from sklearn.compose import ColumnTransformer

delay_columns = smart_grid_new.columns[smart_grid_new.columns.str.startswith("delay")]
adapt_columns = smart_grid_new.columns[smart_grid_new.columns.str.startswith("adapt")]
power_columns = smart_grid_new.columns[smart_grid_new.columns.str.startswith("power")]


#print(tau_columns)

numeric_transformer = MinMaxScaler()
preprocessor = ColumnTransformer([
    ("delay_scaler", numeric_transformer, delay_columns),
    ("adapt_scaler", numeric_transformer, adapt_columns),
    ("power_scaler", numeric_transformer, power_columns)
])

In [62]:
X_class = smart_grid_new.drop(["stab", "stable"],axis=1)
y_class = smart_grid_new["stable"]

In [63]:
X_class = preprocessor.fit_transform(X_class)
X_class.shape

(10000, 12)

In [64]:
X_train, X_test, y_train, y_test = train_test_split(X_class, y_class, test_size=0.2, random_state=10)

In [65]:
def evaluate_classification_models(true, predicted):
    f1 = f1_score(true, predicted)
    precision = precision_score(true, predicted)
    recall =recall_score(true, predicted)
    accuracy = accuracy_score(true, predicted)
    return f1, accuracy, recall, precision

In [66]:
classification_models = {
    "KNeighborsClassifier": KNeighborsClassifier(),
    "RandomForestClassifier": RandomForestClassifier(),
    "DecisionTreeClassifier": DecisionTreeClassifier(),
    "SVC": SVC(),
    "LogisticRegression": LogisticRegression()
}

classification_model_list = []
f1_list = []


for i in range(len(classification_models)):
    model = list(classification_models.values())[i]
    model.fit(X_train, y_train)

    
    Y_train_pred = model.predict(X_train)
    Y_test_pred = model.predict(X_test)

    # Evaluating the train and test dataset
    model_train_f1, model_train_accuracy, model_train_recall, model_train_precision = evaluate_classification_models(y_train, Y_train_pred)
    model_test_f1, model_test_accuracy, model_test_recall, model_test_precision = evaluate_classification_models(y_test, Y_test_pred)

    print(list(classification_models.keys())[i])
    classification_model_list.append(list(classification_models.keys())[i])


    print("Model performance for training set")
    print("- f1 score {:.4f}".format(model_train_f1))
    print("- accuracy score {:.4f}".format(model_train_accuracy))
    print("- precision score {:.4f}".format(model_train_precision))
    print("- recall score {:.4f}".format(model_train_recall))

    print("-" * 35)
    print("\n")

    print("Model Performance for the test set")
    print("- f1 score {:.4f}".format(model_test_f1))
    print("- accuracy score {:.4f}".format(model_test_accuracy))
    print("- precision score {:.4f}".format(model_test_precision))
    print("- recall score {:.4f}".format(model_test_recall))
    f1_list.append(model_test_f1)

    print("=" * 35)
    print("\n")





KNeighborsClassifier
Model performance for training set
- f1 score 0.8770
- accuracy score 0.9146
- precision score 0.9280
- recall score 0.8313
-----------------------------------


Model Performance for the test set
- f1 score 0.7844
- accuracy score 0.8615
- precision score 0.8485
- recall score 0.7294


RandomForestClassifier
Model performance for training set
- f1 score 1.0000
- accuracy score 1.0000
- precision score 1.0000
- recall score 1.0000
-----------------------------------


Model Performance for the test set
- f1 score 0.8795
- accuracy score 0.9190
- precision score 0.9051
- recall score 0.8553


DecisionTreeClassifier
Model performance for training set
- f1 score 1.0000
- accuracy score 1.0000
- precision score 1.0000
- recall score 1.0000
-----------------------------------


Model Performance for the test set
- f1 score 0.7650
- accuracy score 0.8335
- precision score 0.7466
- recall score 0.7844


SVC
Model performance for training set
- f1 score 0.9686
- accuracy s

# Classification Results

In [67]:
classification_results = pd.DataFrame(list(zip(classification_model_list, f1_list)), columns=["Model Name", "F1_Score"]).sort_values(by="F1_Score", ascending=False)
classification_results

Unnamed: 0,Model Name,F1_Score
3,SVC,0.947674
1,RandomForestClassifier,0.879464
0,KNeighborsClassifier,0.784436
2,DecisionTreeClassifier,0.764996
4,LogisticRegression,0.734328


In [68]:
X_reg = smart_grid_new.drop(["stab", "stable"],axis=1)
y_reg = smart_grid_new["stab"]

In [69]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_reg, y_reg, test_size=0.2, random_state=10)

In [70]:
def evaluate_regression_models(true, predicted):
    mse = mean_squared_error(true, predicted)
    mae = mean_absolute_error(true, predicted)
    r2_square= r2_score(true, predicted)
    rmse = np.sqrt(mean_squared_error(true, predicted))
    return mae, r2_square, rmse

In [71]:
regression_models = {
    "Linear Regression": LinearRegression(),
    "Decision Tree Regressor": DecisionTreeRegressor(),
    "KNeighbors Regressor": KNeighborsRegressor(),
    "Random Forest Regressor": RandomForestRegressor(),
    "Lasso": Lasso(),
    "Ridge": Ridge(),
    "XGBRegressor": XGBRegressor(),
    "CatBoosting Regressor": CatBoostRegressor(verbose=False)
}

regression_model_list = []
r2_list = []

for i in range(len(regression_models)):
    model = list(regression_models.values())[i]
    model.fit(X_train, y_train)

    Y_train_pred = model.predict(X_train)
    Y_test_pred = model.predict(X_test)

    model_train_mae, model_train_r2_square, model_train_rmse = evaluate_regression_models(y_train, Y_train_pred)
    model_test_mae, model_test_r2_square, model_test_rmse = evaluate_regression_models(y_test, Y_test_pred)

    print(list(regression_models.keys())[i])
    regression_model_list.append(list(regression_models.keys())[i])

    print("="*35)

    print("Model Performance for Training Set")
    print("- Mean Absolute Error {:.4f}".format(model_train_mae))
    print("- R2 Score Squared Error {:.4f}".format(model_train_r2_square))
    print("- Root Mean Squared Error {:.4f}".format(model_train_rmse))

    print("-" * 35)
    print("\n")

    print("Model Performance for Test Set")
    print("- Mean Absolute Error {:.4f}".format(model_test_mae))
    print("- R2 Score Squared Error {:.4f}".format(model_test_r2_square))
    print("- Root Mean Squared Error {:.4f}".format(model_test_rmse))
    r2_list.append(model_test_r2_square)

    print("="*35)
    print("\n")

Linear Regression
Model Performance for Training Set
- Mean Absolute Error 0.0175
- R2 Score Squared Error 0.6450
- Root Mean Squared Error 0.0221
-----------------------------------


Model Performance for Test Set
- Mean Absolute Error 0.0170
- R2 Score Squared Error 0.6548
- Root Mean Squared Error 0.0214


Decision Tree Regressor
Model Performance for Training Set
- Mean Absolute Error 0.0000
- R2 Score Squared Error 1.0000
- Root Mean Squared Error 0.0000
-----------------------------------


Model Performance for Test Set
- Mean Absolute Error 0.0151
- R2 Score Squared Error 0.7089
- Root Mean Squared Error 0.0196


KNeighbors Regressor
Model Performance for Training Set
- Mean Absolute Error 0.0166
- R2 Score Squared Error 0.6827
- Root Mean Squared Error 0.0209
-----------------------------------


Model Performance for Test Set
- Mean Absolute Error 0.0196
- R2 Score Squared Error 0.5443
- Root Mean Squared Error 0.0246


Random Forest Regressor
Model Performance for Training 

# Results

In [72]:
regression_results = pd.DataFrame(list(zip(regression_model_list, r2_list)), columns = ['Model Name', 'R2_Score']).sort_values(by=["R2_Score"], ascending=False)
regression_results

Unnamed: 0,Model Name,R2_Score
7,CatBoosting Regressor,0.963195
6,XGBRegressor,0.925446
3,Random Forest Regressor,0.896073
1,Decision Tree Regressor,0.708857
5,Ridge,0.654812
0,Linear Regression,0.654782
2,KNeighbors Regressor,0.544295
4,Lasso,-0.000564
