In [None]:
import matplotlib.pyplot as plt

def plot_confusion_matrix(cm, classes,
                        normalize=False,
                        title='Confusion matrix',
                        cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
            horizontalalignment="center",
            color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
import random

from tensorflow.keras.utils import to_categorical

import keras
from keras.models import Sequential
from keras.layers import Dense

from sklearn.compose import ColumnTransformer

from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder

import category_encoders as ce


import itertools


def ann_main(df, predays, hl_count, hl_nodes, batch_size, epochs, futdays, season_flag):

    
    if (season_flag):
        encoder = ce.OneHotEncoder(cols='Season',handle_unknown='return_nan', return_df = True, use_cat_names=True)
        df = encoder.fit_transform(df)
    else:
        df = df.drop(columns = ['Season'])
    
    labels = ["good", "moderate", "unhealthy-sensitive", "unhealthy", "very-unhealthy", 'hazardous']
    bins = [0, 50, 100, 150, 200, 300, 500]
    df['aqi_categories'] = pd.cut(df['aqi'], bins, labels=labels, include_lowest=True)
    
    yle = LabelEncoder()
    df['aqi'] = yle.fit_transform(df['aqi_categories'])
    
    #shifts the 3 things to the front to make adding prevs easier
    df.insert(0, "omean", df.pop("omean"))
    df.insert(0, "aqi", df.pop("aqi"))
    df.insert(0, "pmean", df.pop("pmean"))
    df.insert(0, "aqi_categories", df.pop("aqi_categories"))
    
    ax = df['aqi'].plot.hist()
    
    if (season_flag):
        test_df = df[['temp','humidity','precip','windspeed', "Season_0.0","Season_1.0","Season_2.0","Season_3.0"]].copy()
        for x in range(1,predays+1):
            test_df=test_df.shift(periods=1,fill_value=0)
            df = df.join(test_df[['temp','humidity','precip','windspeed',"Season_0.0","Season_1.0","Season_2.0","Season_3.0"]],rsuffix="_pr"+str(x))
    else:
        test_df = df[['temp','humidity','precip','windspeed']].copy()
        for x in range(1,predays+1):
            test_df=test_df.shift(periods=1,fill_value=0)
            df = df.join(test_df[['temp','humidity','precip','windspeed']],rsuffix="_pr"+str(x))
                 
   
#     X = df.iloc[:, [df.columns.get_loc('temp'),df.columns.get_loc('humidity'), 
#                     df.columns.get_loc('precip'), df.columns.get_loc('windspeed'), ]].values
    
    #print(df.head(5))
    X = df.iloc[:, 5:].values
    
    
    df['aqi'] = df.aqi.shift(-1*futdays)
    df = df.fillna(method='ffill')
    y = df.iloc[:, df.columns.get_loc('aqi')].values
    y = to_categorical(y, num_classes=6)
    print(y.shape)
    print(y)


    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)

    x_row, x_col = X.shape
    ann_input_size = x_col

    ann = keras.Sequential()
    ann.add(Dense(hl_nodes, activation = 'relu', input_dim = ann_input_size))
    for hidden_layer in range(hl_count):
        ann.add(Dense(hl_nodes, activation = 'relu'))
    ann.add(Dense(6, activation = 'softmax'))

    ann.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

    ann.fit(X_train, y_train, batch_size, epochs)

    tr_pred = ann.predict(X_train)
    tr_scores = ann.evaluate(X_train, y_train, verbose = 0)
    print("Acc on train data: {}% \n error on train: {}".format(tr_scores[1], (1-tr_scores[1])))

    te_scores = ann.evaluate(X_test, y_test, verbose = 0)
    print("Acc on test data: {}% \n error on test: {}".format(te_scores[1], 1-te_scores[1]))

    te_pred = ann.predict(X_test)
    te_pred = (te_pred > 0.5).astype(int)
    #cm = confusion_matrix(y_true=y_test, y_pred=te_pred)

    #cm_plot_labels = ['good_aqi','bad_aqi']
    #plot_confusion_matrix(cm=cm, classes=cm_plot_labels, title='Confusion Matrix')

    f1score = f1_score(y_true=y_test, y_pred=te_pred, average='weighted')
    print("f1Score: {}".format(f1score))
    
    return (te_scores[1], tr_scores[1], f1score)

   
    
def ann_run(filename, prevday_max, prevday_min, hl_count_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max, ep_step, futdays,s_flag):


    df = pd.read_csv(filename)

    df_res = pd.DataFrame(columns=['previous_days','hl_count', 'hl_nodes','batch_size', 'epochs', 'future_days', 'f1_score', 'testing_accuracy','training_accuracy'])

    for futday in range(futdays+1):
        for predays in range(prevday_min, prevday_max+1):
            for hl_count in range(hl_count_min,hl_count_max+1):
                for hl_nodes in range(hl_nodes_min, hl_nodes_max+1):
                    for batch in range(batch_min,batch_max+1):
                        for ep in range(ep_min,ep_max+1, ep_step):
                            te_acc, tr_acc, f1Score = ann_main(df = df, predays = predays, hl_count = hl_count, hl_nodes = hl_nodes,batch_size = batch, epochs = ep, futdays = futday, season_flag=s_flag)
                            new_row = {'previous_days':predays, 'hl_count':hl_count, 'hl_nodes':hl_nodes, 'batch_size':batch, 'epochs':ep, 'future_days':futday, 'f1_score':f1Score, 'testing_accuracy':te_acc, 'training_accuracy':tr_acc}
                            df_res = df_res.append(new_row, ignore_index=True)       
                        
    print("ANN run done")
    
    return (df_res)
    

print("done")

In [None]:
prevday_min = 1
prevday_max = 1
hl_count_max = 1
hl_nodes_min = 10
hl_nodes_max= 10
batch_min= 10
batch_max=10
ep_min=10 
ep_max=10
ep_step = 5
futdays = 0
df_res = ann_run("../Data_Prep_LA/WeatherAndPollution_2011_2020.csv", prevday_max, prevday_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max, ep_step, futdays, s_flag = False)
df_res.to_csv("LA_Results_20230212.csv",index=False)
print(df_res)

In [None]:
prevday_min = 1
prevday_max = 1
hl_count_min = 5
hl_count_max = 5
hl_nodes_min = 10
hl_nodes_max= 10
batch_min= 10
batch_max=10
ep_min=10 
ep_max=10
ep_step = 5
futdays = 0
df_res = ann_run("../Data_Prep_SF/WeatherAndPollution_2011_2020.csv", prevday_max, prevday_min, hl_count_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max, ep_step, futdays, s_flag = False)
df_res.to_csv("SF_Results_20230212.csv",index=False)
print(df_res)

In [None]:
prevday_min = 0
prevday_max = 0
hl_count_max = 1
hl_nodes_min = 10
hl_nodes_max= 10
batch_min= 10
batch_max=10
ep_min=10 
ep_max=10
futdays = 0
df_res = ann_run("../Data_Prep_FL/WeatherAndPollution_2011_2020.csv", prevday_max, prevday_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max,futdays)
df_res.to_csv("FL_Results_20230212.csv",index=False)
print(df_res)


In [None]:
prevday_min = 0
prevday_max = 0
hl_count_max = 1
hl_nodes_min = 10
hl_nodes_max= 10
batch_min= 10
batch_max=10
ep_min=10 
ep_max=10
futdays = 0
df_res = ann_run("../Data_Prep_NY/WeatherAndPollution_2011_2020.csv", prevday_max, prevday_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max,futdays)
df_res.to_csv("NY_Results_20230212.csv",index=False)
print(df_res)

In [None]:
prevday_min = 0
prevday_max = 0
hl_count_max = 1
hl_nodes_min = 10
hl_nodes_max= 10
batch_min= 10
batch_max=10
ep_min=10 
ep_max=10
ep_step = 1
futdays = 0
df_res = ann_run("../Data_Prep_ALL/WeatherAndPollution_2011_2020.csv", prevday_max, prevday_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max, ep_step, futdays)
df_res.to_csv("ALL_Results_20230212.csv",index=False)
print(df_res)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

filename = "../ANN/LA_Results_20230129.csv" #up to PyCode, down to ANN
df_results = pd.read_csv(filename)
df_results = df_results.drop(df_results.columns[0], axis=1)

for prev in range(0,6):
    label = "N"+str(prev)+"C"+str(hl_nodes)
    specific_df = df_results.loc[(df_results['previous_days'] == prev) & (df_results['hl_nodes']==hl_nodes)]
    plt.plot(specific_df['hl_count'], specific_df['accuracy'], label=label) #plots

#plt.legend(loc="best")
plt.ylim(0.75,0.85)

#plt.show() #displays plot
plt.savefig(fname="New_York_HL_Count")

In [None]:
def runHypTun(infile, pre):
    prevday_min = 1
    prevday_max = 1
    hl_count_min = 5
    hl_count_max = 5
    hl_nodes_min = 10
    hl_nodes_max= 10
    batch_min= 10
    batch_max=10
    ep_min=10 
    ep_max=10
    ep_step = 1
    futdays = 0

    hl_nodes_min = 5
    hl_nodes_max= 15
    df_res = ann_run(infile, prevday_max, prevday_min, hl_count_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max, ep_step, futdays, s_flag = False)
    df_res.to_csv("../HypTun/"+pre+"_Results_HLNCount.csv",index=False)
    hl_nodes_min = 10
    hl_nodes_max= 10


    hl_count_min = 2
    hl_count_max= 10
    df_res = ann_run(infile, prevday_max, prevday_min, hl_count_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max, ep_step, futdays, s_flag = False)
    df_res.to_csv("../HypTun/"+pre+"_Results_HLCount.csv",index=False)
    hl_count_min = 5
    hl_count_max = 5

    ep_min = 5
    ep_max = 50
    ep_step = 5
    df_res = ann_run(infile, prevday_max, prevday_min, hl_count_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max, ep_step, futdays, s_flag = False)
    df_res.to_csv("../HypTun/"+pre+"_Results_Epochs.csv",index=False)
    ep_min=10 
    ep_max=10
    ep_step = 1
    

    batch_min = 5 
    batch_max= 15
    df_res = ann_run(infile, prevday_max, prevday_min, hl_count_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max, ep_step, futdays, s_flag = False)
    df_res.to_csv("../HypTun/"+pre+"_Results_Batch.csv",index=False)
    batch_min= 10
    batch_max=10


    prevday_min = 0
    prevday_max= 5
    df_res = ann_run(infile, prevday_max, prevday_min, hl_count_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max, ep_step, futdays, s_flag = False)
    df_res.to_csv("../HypTun/"+pre+"_Results_Predays.csv",index=False)
    prevday_min = 1
    prevday_max = 1


    futdays = 14
    df_res = ann_run(infile, prevday_max, prevday_min, hl_count_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max, ep_step, futdays, s_flag = False)
    df_res.to_csv("../HypTun/"+pre+"_Results_Futdays.csv",index=False)
    futdays=0

runHypTun("../Data_Prep_LA/WeatherAndPollution_2011_2020.csv", "LA")
runHypTun("../Data_Prep_SF/WeatherAndPollution_2011_2020.csv", "SF")
runHypTun("../Data_Prep_FL/WeatherAndPollution_2011_2020.csv", "FL")
runHypTun("../Data_Prep_NY/WeatherAndPollution_2011_2020.csv", "NY")






In [None]:
prevday_min = 0
prevday_max = 5
hl_count_min = 2
hl_count_max = 10
hl_nodes_min = 5
hl_nodes_max= 15
batch_min= 5
batch_max=15
ep_min=5 
ep_max=50
ep_step = 5
futdays = 14
df_res = ann_run("../Data_Prep_Baker/WeatherAndPollution_2011_2020.csv", prevday_max, prevday_min, hl_count_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max, ep_step, futdays, s_flag = False)
df_res.to_csv("Baker_Results_20230212.csv",index=False)
print(df_res)

In [None]:
def runHypTun(pre):
    filename = "../Data_Prep_Baker/WeatherAndPollution_2011_2020.csv"
    prevday_min = 1
    prevday_max = 1
    hl_count_min = 5
    hl_count_max = 5
    hl_nodes_min = 10
    hl_nodes_max= 10
    batch_min= 10
    batch_max=10
    ep_min=10 
    ep_max=10
    ep_step = 1
    futdays = 0

    hl_nodes_min = 5
    hl_nodes_max= 15
    df_res = ann_run(filename, prevday_max, prevday_min, hl_count_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max, ep_step, futdays, s_flag = False)
    df_res.to_csv("../HypTun/"+pre+"_Results_HLNCount.csv",index=False)
    hl_nodes_min = 10
    hl_nodes_max= 10


    hl_count_min = 2
    hl_count_max= 10
    df_res = ann_run(filename, prevday_max, prevday_min, hl_count_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max, ep_step, futdays, s_flag = False)
    df_res.to_csv("../HypTun/"+pre+"_Results_HLCount.csv",index=False)
    hl_count_min = 5
    hl_count_max = 5

    ep_min = 5
    ep_max = 50
    ep_step = 5
    df_res = ann_run(filename, prevday_max, prevday_min, hl_count_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max, ep_step, futdays, s_flag = False)
    df_res.to_csv("../HypTun/"+pre+"_Results_Epochs.csv",index=False)
    ep_min=10 
    ep_max=10
    ep_step = 1
    

    batch_min = 5 
    batch_max= 15
    df_res = ann_run(filename, prevday_max, prevday_min, hl_count_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max, ep_step, futdays, s_flag = False)
    df_res.to_csv("../HypTun/"+pre+"_Results_Batch.csv",index=False)
    batch_min= 10
    batch_max=10


    prevday_min = 0
    prevday_max= 5
    df_res = ann_run(filename, prevday_max, prevday_min, hl_count_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max, ep_step, futdays, s_flag = False)
    df_res.to_csv("../HypTun/"+pre+"_Results_Predays.csv",index=False)
    prevday_min = 1
    prevday_max = 1


    futdays = 14
    df_res = ann_run(filename, prevday_max, prevday_min, hl_count_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max, ep_step, futdays, s_flag = False)
    df_res.to_csv("../HypTun/"+pre+"_Results_Futdays.csv",index=False)
    futdays=0

runHypTun("Baker")





In [None]:
def runHypTun(pre):
    filename = "../Data_Prep_"+pre+"/WeatherAndPollution_2011_2020.csv"
    prevday_min = 1
    prevday_max = 1
    hl_count_min = 5
    hl_count_max = 5
    hl_nodes_min = 10
    hl_nodes_max= 10
    batch_min= 10
    batch_max=10
    ep_min=10 
    ep_max=10
    ep_step = 1
    futdays = 0

    hl_nodes_min = 5
    hl_nodes_max= 15
    df_res = ann_run(filename, prevday_max, prevday_min, hl_count_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max, ep_step, futdays, s_flag = False)
    df_res.to_csv("../HypTun/"+pre+"_Results_HLNCount.csv",index=False)
    hl_nodes_min = 10
    hl_nodes_max= 10


    hl_count_min = 2
    hl_count_max= 10
    df_res = ann_run(filename, prevday_max, prevday_min, hl_count_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max, ep_step, futdays, s_flag = False)
    df_res.to_csv("../HypTun/"+pre+"_Results_HLCount.csv",index=False)
    hl_count_min = 5
    hl_count_max = 5

    ep_min = 5
    ep_max = 50
    ep_step = 5
    df_res = ann_run(filename, prevday_max, prevday_min, hl_count_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max, ep_step, futdays, s_flag = False)
    df_res.to_csv("../HypTun/"+pre+"_Results_Epochs.csv",index=False)
    ep_min=10 
    ep_max=10
    ep_step = 1
    

    batch_min = 5 
    batch_max= 15
    df_res = ann_run(filename, prevday_max, prevday_min, hl_count_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max, ep_step, futdays, s_flag = False)
    df_res.to_csv("../HypTun/"+pre+"_Results_Batch.csv",index=False)
    batch_min= 10
    batch_max=10


    prevday_min = 0
    prevday_max= 5
    df_res = ann_run(filename, prevday_max, prevday_min, hl_count_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max, ep_step, futdays, s_flag = False)
    df_res.to_csv("../HypTun/"+pre+"_Results_Predays.csv",index=False)
    prevday_min = 1
    prevday_max = 1


    futdays = 14
    df_res = ann_run(filename, prevday_max, prevday_min, hl_count_min, hl_count_max, hl_nodes_min, hl_nodes_max, batch_min, batch_max, ep_min, ep_max, ep_step, futdays, s_flag = False)
    df_res.to_csv("../HypTun/"+pre+"_Results_Futdays.csv",index=False)
    futdays=0

#runHypTun("Baker")
runHypTun("AZ")
runHypTun("TX")

