In [None]:
import matplotlib.pyplot as plt

def plot_confusion_matrix(cm, classes,
                        normalize=False,
                        title='Confusion matrix',
                        cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.figure(1)
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
            horizontalalignment="center",
            color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
import random

from tensorflow.keras.utils import to_categorical

import keras
from keras.models import Sequential
from keras.layers import Dense

from sklearn.compose import ColumnTransformer

from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score, precision_score, recall_score


from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder

from sklearn.preprocessing import Binarizer

import category_encoders as ce

import itertools





from scipy import interp
import matplotlib.pyplot as plt
from itertools import cycle
from sklearn.metrics import roc_curve, auc





def ann_main(df, hl_count, hl_nodes, batch_size, epochs, season_flag, thres, noise_frac, noise_var, loc):
    
    
    
    if (season_flag):
        encoder = ce.OneHotEncoder(cols='Season',handle_unknown='return_nan', return_df = True, use_cat_names=True)
        df = encoder.fit_transform(df)
    else:
        df = df.drop(columns = ['Season'])
    
    #the below line creates random for testing, expect bad ROC curve.
    #df['outagePercent'] = np.random.randint(1, 101, df.shape[0])
    df['outageValue'] = df['outageValue'].gt(thres).astype(int)
    #print(df.loc[df['outagePercent'] > 10])

    #ax = df['outagePercent'].plot.hist()
    
    noise_i = df.columns.get_loc(noise_var) - 1
    
    if (season_flag):
        X = df.iloc[:, 1:10].values 
        y = df.iloc[:, 18].values
        
    else:
        X = df.iloc[:, 1:6].values
        y = df.iloc[:, 14].values  
    
    
    #print(X.shape) 
    
    #y = to_categorical(y, num_classes=)
    #print(y.shape)
    #print(y)


    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)
    
    #print(y_train[0:100])
    #print("LINE BREAK")
    
    #noise_arr = np.random.normal(0, noise_frac*np.mean(X_train[:, noise_i]), (X_train.shape[0]))
    
    #X_train[:, noise_i] = np.add(noise_arr, X_train[:, noise_i])
    

    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)

    x_row, x_col = X.shape
    ann_input_size = x_col
    

    ann = keras.Sequential()
    ann.add(Dense(hl_nodes, activation = 'relu', input_dim = ann_input_size))
    for hidden_layer in range(hl_count):
        ann.add(Dense(hl_nodes, activation = 'relu'))
    ann.add(Dense(1, activation = 'sigmoid'))

    ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
    
    ann.fit(X_train, y_train, batch_size, epochs)

    tr_pred = ann.predict(X_train)
    tr_scores = ann.evaluate(X_train, y_train, verbose = 0)
    print("Acc on train data: {}% \n error on train: {}".format(tr_scores[1], (1-tr_scores[1])))

    te_scores = ann.evaluate(X_test, y_test, verbose = 0)
    print("Acc on test data: {}% \n error on test: {}".format(te_scores[1], 1-te_scores[1]))

    te_pred = ann.predict(X_test)
    
    fpr_keras, tpr_keras, thresholds_keras = roc_curve(y_test, te_pred)
    auc_keras = auc(fpr_keras, tpr_keras)

    #print(fpr_keras.shape)
    #print(tpr_keras.shape)
    #print(thresholds_keras.shape)
    #print(fpr_keras)
    #print(tpr_keras)
    #print(thresholds_keras)
    #print(auc_keras)
    #print(type(te_pred))
    
    
    plt.figure(0)
    plt.plot([0, 1], [0, 1], 'y--')
    plt.plot(fpr_keras, tpr_keras, marker= ".")
    plt.xlabel('False positive rate')
    plt.ylabel('True positive rate')
    plt.title(loc + ' ROC curve')
    plt.legend(loc='best')
    plt.savefig("../Results/Graphs/" + loc + "_AUC_Curve.png")
    plt.show()
    
    
    
    #print("Y TEST")
    #print(y_test)
    #print("TE PRED")
    #print(te_pred)
    
    # Create a Binarizer object
    binarizer = Binarizer(threshold=0.5)

    # Convert the numpy array to binary
    te_pred_binarized = binarizer.transform(te_pred)
    
    #print("Te pred binary")
    #print(te_pred_binarized)
    
    
    cm = confusion_matrix(y_true=y_test, y_pred=te_pred_binarized)

    cm_plot_labels = ['No outage', 'Power Outage']
    plot_confusion_matrix(cm=cm, classes=cm_plot_labels, title=loc+' Confusion Matrix')

    f1score = f1_score(y_true=y_test, y_pred=te_pred.round(), average='weighted')
    print("f1Score: {}".format(f1score))
    precision = precision_score(y_true=y_test, y_pred=te_pred.round(), average='weighted')
    print("Precision: {}".format(precision))
    recall = recall_score(y_true=y_test, y_pred=te_pred.round(), average='weighted')
    print("Recall: {}".format(recall))
    
    #for layer in ann.layers: print(layer.get_config(), layer.get_weights()) #weights
    
    return (te_scores[1], tr_scores[1], f1score, precision, recall, auc_keras)

print ("done")

In [None]:
def ann_run(filename, th_min, th_max, th_step, hl_count_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max, ep_step,s_flag, noise_fr_min, noise_fr_max, noise_fr_step, noise_var, loc):


    df = pd.read_csv(filename)

    df_res = pd.DataFrame(columns=['threshold', 'hl_count', 'hl_nodes','batch_size', 'epochs', 'noise_variable', 'noise_fraction', 'f1_score', 'testing_accuracy','training_accuracy', 'AUC'])

    for n_fr in range(noise_fr_min, noise_fr_max+1, noise_fr_step):
        for threshold in range(th_min, th_max+1, th_step):
            for hl_count in range(hl_count_min,hl_count_max+1):
                for hl_nodes in range(hl_nodes_min, hl_nodes_max+1):
                    for batch in range(batch_min,batch_max+1):
                        for ep in range(ep_min,ep_max+1, ep_step):
                            te_acc, tr_acc, f1Score, precision, recall, auc = ann_main(df = df, hl_count = hl_count, hl_nodes = hl_nodes,batch_size = batch, epochs = ep, season_flag=s_flag, thres = threshold/100.0, noise_frac = n_fr/100.0,noise_var = noise_var, loc = loc)
                            new_row = {'threshold':threshold, 'hl_count':hl_count, 'hl_nodes':hl_nodes, 'batch_size':batch, 'epochs':ep, 'noise_variable':noise_var, 'noise_fraction':n_fr, 'f1_score':f1Score, 'testing_accuracy':te_acc, 'training_accuracy':tr_acc, 'precision':precision, 'recall':recall, 'AUC': auc}
                            df_res = df_res.append(new_row, ignore_index=True)       
                        
    print("ANN run done")
    
    return (df_res)
    
print ("done")

In [None]:
th_min = (int) (0.1 * 100)
th_max = (int) (0.1 * 100)
th_step = 1*100
hl_count_min = 1
hl_count_max = 1
hl_nodes_min = 10
hl_nodes_max= 10
batch_min= 10
batch_max=10
ep_min=10 
ep_max=10
ep_step = 5
s_flag = True
noise_var = 'temp'
noise_frac_min = 0
noise_frac_max = 0
noise_frac_step = 10
df_res = ann_run("../Data/GeneratedData/Fresno_Weather_Outages.csv", th_min, th_max, th_step, hl_count_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max, ep_step, s_flag, noise_frac_min, noise_frac_max, noise_frac_step, noise_var)
df_res.to_csv("../Results/Fresno_Results_20230814.csv",index=False)
print(df_res)


In [None]:
locationList = ['San Francisco',
                'Fresno',
                'San Jose',
                'Bellevue',
                'Eugene',
                'Seattle',
                'Vancouver',
                'Tacoma',
                'San Diego',
                'Los Angeles'
               ]

In [None]:
import plotly.figure_factory as ff

df_res_com = pd.DataFrame(columns= ['location', 'f1_score', 'testing_accuracy', 'training_accuracy', 'AUC'])
for loc in locationList:
    th_min = (int) (0.5 * 100)
    th_max = (int) (0.5 * 100)
    th_step = 1000
    hl_count_min = 1
    hl_count_max = 1
    hl_nodes_min = 10
    hl_nodes_max= 10
    batch_min= 10
    batch_max=10
    ep_min=10 
    ep_max=10
    ep_step = 5
    s_flag = True
    noise_var = 'temp'
    noise_frac_min = 0
    noise_frac_max = 0
    noise_frac_step = 10
    print("-----------------" + loc + "----------------------")
    df_res = ann_run("../Data/GeneratedData/"+loc+"_Weather_Outages.csv", th_min, th_max, th_step, hl_count_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max, ep_step, s_flag, noise_frac_min, noise_frac_max, noise_frac_step, noise_var, loc)
    new_row = {'location': loc, 'f1_score': df_res.loc[0, 'f1_score'], 'testing_accuracy':df_res.loc[0, 'testing_accuracy'], 'training_accuracy':df_res.loc[0, 'training_accuracy'], 'precision':df_res.loc[0, 'precision'], 'recall':df_res.loc[0, 'recall'], 'AUC':df_res.loc[0, 'AUC']}
    df_res_com = df_res_com.append(new_row, ignore_index=True) 
    df_res.to_csv("../Results/ModelResults/"+loc+"_Results_20230903.csv",index=False)
    
df_res_com = df_res_com.round(4)
df_res_com.to_csv("../Results/ModelResults/Combined_20230903.csv", index=False)

df_res_com = df_res_com[['location', 'f1_score', 'testing_accuracy', 'training_accuracy', 'precision', 'recall', 'AUC']]#'AUC'


fig = ff.create_table(df_res_com)
fig.update_layout(
autosize=False,
width=750,
height=300,
)

fig.write_image("../Results/ModelResults/Combined_20230903.png", scale=2)