In [1]:
# Import dependencies
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import json

import datetime
import re  

from sqlalchemy import create_engine

# Supress warnings
import warnings
warnings.filterwarnings("ignore")

def neural_model(X, y, layer_nodes, activation, epochs):
   
    # Creating training and testing subsets
    split = int(X.shape[0]*0.7)
    X_train = X[:split]
    X_test = X[split:]
    y_train = y[:split]
    y_test = y[split:]
              
    # Standarize the data
    X_scaler = StandardScaler().fit(X_train)

    X_train_scaled = X_scaler.transform(X_train)
    X_test_scaled = X_scaler.transform(X_test)

    # Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
    model = Sequential()
    for index, nodes in enumerate(layer_nodes):
        if index == 0:
            model.add(Dense(units=nodes, input_dim=len(X_train_scaled[0]), activation=activation))
        else:
            model.add(Dense(units=nodes, activation=activation))
    model.add(Dense(units=1, activation='sigmoid'))

    # Check the structure of the model
    # print(model.summary())
    
    # Compile the model
    model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
    
    history = model.fit(X_train_scaled, y_train, validation_data=(X_test_scaled, y_test),epochs=epochs,verbose=0)
    y_pred = model.predict(X_test_scaled)
    y_pred = pd.Series(pd.DataFrame.from_records(y_pred)[0].values)
    
    
    metrics = pd.DataFrame({
        "Train Loss": history.history['loss'],
        "Test Loss": history.history['val_loss'],
        "Train Accuracy": history.history['accuracy'],
        "Test Accuracy": history.history['val_accuracy']
    }).reset_index(drop=True)
    
    
    model_loss_train, model_accuracy_train = model.evaluate(X_train_scaled,y_train)
    model_loss_test, model_accuracy_test = model.evaluate(X_test_scaled,y_test)
    
    d={}
    d['Name'] = (f"Neural Model: {layer_nodes}, {activation}, {epochs}")
    d['Accuracy Score Train'] = (f"{model_accuracy_train:.4f}")
    d['Accuracy Score Test'] = (f"{model_accuracy_test:.4f}")
    d['Model Loss Train'] = (f"{model_loss_train:.4f}")
    d['Model Loss Test'] = (f"{model_loss_test:.4f}")
    d['Predicted Correctly'] = (f"{((y_pred == 0) & (y_test == 0)).sum()}")
    d['Actual'] = (f"{(y_test == 0).sum()}")


    return d, metrics.to_dict()

trains = {
    "9": {
        "df_1": ['Time', 'T5-S3-PRE-FeCL2'],
        "df_2": r'(^T5.*-P9-.*|Time)',
        "df_3": r'(^T5.*-P9.*|Time)',
        "df_4": r'(^T5.*-S17.*|Time)',
        "target": "T5-S3-SEC-S17-TP"
    },
    "10": {
        "df_1": ['Time', 'T5-S3-PRE-FeCL2'],
        "df_2": r'(^T5.*-P10-.*|Time)',
        "df_3": r'(^T5.*-P10.*|Time)',
        "df_4": r'(^T5.*-S18.*|Time)',
        "target": "T5-S3-SEC-S18-TP"
    },
    "11": {
        "df_1": ['Time', 'T5-S3-PRE-FeCL2'],
        "df_2": r'(^T5.*-P11-.*|Time)',
        "df_3": r'(^T5.*-P11.*|Time)',
        "df_4": r'(^T5.*-S19.*|Time)',
        "target": "T5-S3-SEC-S19-TP"
    },
    "12": {
        "df_1": ['Time', 'T6-S3-PRE-FeCL2'],
        "df_2": r'(^T6.*-P12-.*|Time)',
        "df_3": r'(^T6.*-P12.*|Time)',
        "df_4": r'(^T[5,6].*-S20.*|Time)',
        "target": "T5-S3-SEC-S20-TP"
    },
    "13": {
         "df_1": ['Time', 'T6-S3-PRE-FeCL2'],
        "df_2": r'(^T6.*-P13-.*|Time)',
        "df_3": r'(^T6.*-P13.*|Time)',
        "df_4": r'(^T[5,6].*-S21.*|Time)',
        "target": "T5-S3-SEC-S21-TP"
    },
    "14": {
        "df_1": ['Time', 'T6-S3-PRE-FeCL2'],
        "df_2": r'(^T6.*-P14-.*|Time)',
        "df_3": r'(^T6.*-P14.*|Time)',
        "df_4": r'(^T[5,6].*-S22.*|Time)',
        "target": "T5-S3-SEC-S22-TP"
    }        
}    

model_results = {}

for train, vars in trains.items():
    # Connecting to the Database
    engine = create_engine("postgresql://postgres:postgres@localhost/WWTP")
    conn = engine.connect()

    # Reading SQL query into a Dataframe 
    df_1 = pd.read_sql_query('select * from "Preliminary"', con=conn)
    df_2 = pd.read_sql_query('select * from "Primary"', con=conn)
    df_3 = pd.read_sql_query('select * from "Aeration"', con=conn)
    df_4 = pd.read_sql_query('select * from "Secondary"', con=conn)

    # Close the connection
    conn.close()

    df_1 = df_1[vars['df_1']] 
    specific_columns = []
    for col in list(df_2.columns):
        if (re.match(vars['df_2'], col)):
            specific_columns.append(col)         
    df_2 = df_2[specific_columns]
    specific_columns = []
    for col in list(df_3.columns):
        if (re.match(vars['df_3'], col)): 
            specific_columns.append(col)
    df_3 = df_3[specific_columns]
    specific_columns = []
    for col in list(df_4.columns):
        if (re.match(vars['df_4'], col)):
            specific_columns.append(col)
    df_4 = df_4[specific_columns]   

    # Merging Dataframes
    df_temp_1 = pd.merge(df_1, df_2, on='Time', how='outer')
    df_temp_2 = pd.merge(df_temp_1, df_3, on='Time', how='outer')
    df = pd.merge(df_temp_2, df_4, on='Time', how='outer')

    # Add a classified column for 'TP' - value of 0 for exceedance "out of compliance"
    df['TP_Exceedance'] = df[vars['target']].apply(lambda x: 1 if x < 0.35 else 0)
    df.drop(vars['target'], inplace = True, axis = 1)

    # Keeping the records satring on July 1st, 2017
    df = df[df['Time'] >= datetime.datetime(2017,7,1)].sort_values(by='Time')

    # Resetting the index
    df.reset_index(inplace=True, drop=True)

    # Dropping columns due to missing data until November 2018
    specific_columns = []
    for col in df.columns:
        if (re.match(r'(^.*-PRI-.*-TKN|^.*-PRI-.*-Ammonia|^.*-PRI-.*-Nitrate|^.*-PRI-.*-Nitrite)', col)):
            specific_columns.append(col)
    df.drop(columns=specific_columns, inplace = True, axis = 1)            

    # Dropping NaN
    df = df.dropna()

    # Covert Time into numerical columns
    df['month'] = df['Time'].dt.month
    df['week'] = df['Time'].dt.week
    df['day'] = df['Time'].dt.day

    # Create a Series for "Time" column
    time_column = df["Time"]

    # Drop the time, year and month columns
    df.drop(['Time'], inplace = True, axis = 1)

    y = df['TP_Exceedance']
    X = df.drop(columns="TP_Exceedance")

    layer_nodes=[100,75]
    activation='relu'
    epochs=500
    
    outcome, metrics =  neural_model(X, y, layer_nodes, activation, epochs)
    
    model_results[train] = {
        "outcome": outcome,
        "metrics": metrics
    }

fileName = "Neural_Network_Classification.json"
with open(fileName, 'w', encoding='utf-8') as outfile:
    json.dump(model_results, outfile, indent=4)

