# CAPSTONE PROJECT: phase 2

# Feedforward Neural Network model - Simulation

In [104]:
# Tensorflow / Keras
from tensorflow import keras 
print('Tensorflow/Keras: %s' % keras.__version__) # print version
from keras.models import Sequential 
from keras import Input 
from keras.layers import Dense 

# Data manipulation
import pandas as pd 
print('pandas: %s' % pd.__version__) # print version
import numpy as np 
print('numpy: %s' % np.__version__) # print version

# Sklearn
import sklearn 
print('sklearn: %s' % sklearn.__version__) # print version
from sklearn.model_selection import train_test_split 
from sklearn.metrics import classification_report 

# Visualization
import plotly 
import plotly.express as px
import plotly.graph_objects as go
print('plotly: %s' % plotly.__version__) # print version

Tensorflow/Keras: 2.12.0
pandas: 1.5.3
numpy: 1.23.5
sklearn: 1.2.1
plotly: 5.9.0


In [105]:
# Set Pandas options to display more columns
pd.options.display.max_columns=50

# Read the Kerala data csv
df=pd.read_csv('kerala.csv', encoding='utf-8')

# Drop records where target FLOODS
df=df[pd.isnull(df['FLOODS'])==False]

# For other columns with missing values, fill them in with column mean
df=df.fillna(df.mean())

# Create a flag for FLOODS, note FLOODS will be our target variable
df['FLOODS_Flag']=df['FLOODS'].apply(lambda x: 1 if x=='Yes' else 0)

# Print data
df

  df=df.fillna(df.mean())


Unnamed: 0,SUBDIVISION,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,ANNUAL RAINFALL,FLOODS,FLOODS_Flag
0,KERALA,1901,28.7,44.7,51.6,160.0,174.7,824.6,743.0,357.5,197.7,266.9,350.8,48.4,3248.6,YES,0
1,KERALA,1902,6.7,2.6,57.3,83.9,134.5,390.9,1205.0,315.8,491.6,358.4,158.3,121.5,3326.6,YES,0
2,KERALA,1903,3.2,18.6,3.1,83.6,249.7,558.6,1022.5,420.2,341.8,354.1,157.0,59.0,3271.2,YES,0
3,KERALA,1904,23.7,3.0,32.2,71.5,235.7,1098.2,725.5,351.8,222.7,328.1,33.9,3.3,3129.7,YES,0
4,KERALA,1905,1.2,22.3,9.4,105.9,263.3,850.2,520.5,293.6,217.2,383.5,74.4,0.2,2741.6,NO,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113,KERALA,2014,4.6,10.3,17.9,95.7,251.0,454.4,677.8,733.9,298.8,355.5,99.5,47.2,3046.4,YES,0
114,KERALA,2015,3.1,5.8,50.1,214.1,201.8,563.6,406.0,252.2,292.9,308.1,223.6,79.4,2600.6,NO,0
115,KERALA,2016,2.4,3.8,35.9,143.0,186.4,522.2,412.3,325.5,173.2,225.9,125.4,23.6,2176.6,NO,0
116,KERALA,2017,1.9,6.8,8.9,43.6,173.5,498.5,319.6,531.8,209.5,192.4,92.5,38.1,2117.1,NO,0


In [106]:
df['FLOODS'].replace(['YES','NO'],[1,0],inplace=True)
df

Unnamed: 0,SUBDIVISION,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,ANNUAL RAINFALL,FLOODS,FLOODS_Flag
0,KERALA,1901,28.7,44.7,51.6,160.0,174.7,824.6,743.0,357.5,197.7,266.9,350.8,48.4,3248.6,1,0
1,KERALA,1902,6.7,2.6,57.3,83.9,134.5,390.9,1205.0,315.8,491.6,358.4,158.3,121.5,3326.6,1,0
2,KERALA,1903,3.2,18.6,3.1,83.6,249.7,558.6,1022.5,420.2,341.8,354.1,157.0,59.0,3271.2,1,0
3,KERALA,1904,23.7,3.0,32.2,71.5,235.7,1098.2,725.5,351.8,222.7,328.1,33.9,3.3,3129.7,1,0
4,KERALA,1905,1.2,22.3,9.4,105.9,263.3,850.2,520.5,293.6,217.2,383.5,74.4,0.2,2741.6,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113,KERALA,2014,4.6,10.3,17.9,95.7,251.0,454.4,677.8,733.9,298.8,355.5,99.5,47.2,3046.4,1,0
114,KERALA,2015,3.1,5.8,50.1,214.1,201.8,563.6,406.0,252.2,292.9,308.1,223.6,79.4,2600.6,0,0
115,KERALA,2016,2.4,3.8,35.9,143.0,186.4,522.2,412.3,325.5,173.2,225.9,125.4,23.6,2176.6,0,0
116,KERALA,2017,1.9,6.8,8.9,43.6,173.5,498.5,319.6,531.8,209.5,192.4,92.5,38.1,2117.1,0,0


In [107]:
# Select data for modeling
X=df[['OCT', 'NOV', 'DEC']]
#X=df[['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL','AUG', 'SEP', 'OCT', 'NOV', 'DEC']]
y=df['FLOODS'].values


# Create training and testing samples
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


# Specify the structure of a neural network 
model3 = Sequential(name="Model-with-Two-Inputs") # Model using Sequential Keras API
model3.add(Input(shape=(3,), name='Input-Layer')) # one input layer with specific no. of nodes (change x value in shape=(x,) if feeding different input node value)
model3.add(Dense(2, activation='softplus', name='Hidden-Layer')) # one hidden layer with specific no. of nodes
model3.add(Dense(1, activation='sigmoid', name='Output-Layer')) # one output layer with specific no. of nodes


# Compiling keras model
model3.compile(optimizer='adam', # used for updating weights during training
              loss='binary_crossentropy', # binary cross entropy used at loss function
              metrics=['Accuracy', 'Precision', 'Recall'], # metrics used to evaluate model performance when training 
              loss_weights=None, 
              weighted_metrics=None, 
              run_eagerly=None, 
              steps_per_execution=None
             )

# Fit keras model (Backpropagation)
model3.fit(X_train, 
          y_train, 
          batch_size=10, # No. of samples. Backpropagation performed on each batch data 
          epochs=3, # No. of times dataset pass thru model during training
          verbose='auto', # control verbosity of output when training
          callbacks=None, # callback functions for execution during training
          validation_split=0.2, # proportion of the train data that will be used for the validation set to monitor model's performance when training. 20% validation, 80% training         
          shuffle=True, # shuffling train data before epoch to avoid bias
          class_weight={0 : 0.3, 1 : 0.7}, # addressing class imbalance issue in the train data. 
          sample_weight=None, # optional weight to be applied to the samples
          initial_epoch=0, # default training starting at 0
          steps_per_epoch=None,  # No. of batches to run in the epochs
          validation_steps=None, # No. of steps to run in each validation epoch
          validation_batch_size=None, # size of validation batch
          validation_freq=3, # Frequency of validation runtime during training
          max_queue_size=10, # max size of generator queue
          #use_multiprocessing=False, # multiprocess for loading data
         )

# Use model to make predictions
# Predict class labels on training data
pred_labels_tr = (model3.predict(X_train) > 0.5).astype(int)
# Predict class labels on a test data
pred_labels_te = (model3.predict(X_test) > 0.5).astype(int)


# Model Performance Summary
print("")
print('-------------------- Model Summary --------------------')
model3.summary() # print model summary
print("")
print('-------------------- Weights and Biases --------------------')
for layer in model3.layers:
    print("Layer: ", layer.name) # print layer name
    print("  --Kernels (Weights): ", layer.get_weights()[0]) # kernels (weights)
    print("  --Biases: ", layer.get_weights()[1]) # biases
    

Epoch 1/3
Epoch 2/3
Epoch 3/3

-------------------- Model Summary --------------------
Model: "Model-with-Two-Inputs"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Hidden-Layer (Dense)        (None, 2)                 8         
                                                                 
 Output-Layer (Dense)        (None, 1)                 3         
                                                                 
Total params: 11
Trainable params: 11
Non-trainable params: 0
_________________________________________________________________

-------------------- Weights and Biases --------------------
Layer:  Hidden-Layer
  --Kernels (Weights):  [[ 0.680925   -0.4317884 ]
 [-0.10154253  0.63812727]
 [-0.8207147  -0.494289  ]]
  --Biases:  [-0.0234927  -0.01647993]
Layer:  Output-Layer
  --Kernels (Weights):  [[1.3240471 ]
 [0.03227403]]
  --Biases:  [-0.02351598]


In [108]:
print("")
print('------------- Evaluation on Training Data -------------')
print(classification_report(y_train, pred_labels_tr))
print("")

print('------------- Evaluation on Test Data -------------')
print(classification_report(y_test, pred_labels_te))
print("")


------------- Evaluation on Training Data -------------
              precision    recall  f1-score   support

           0       1.00      0.02      0.05        42
           1       0.56      1.00      0.72        52

    accuracy                           0.56        94
   macro avg       0.78      0.51      0.38        94
weighted avg       0.76      0.56      0.42        94


------------- Evaluation on Test Data -------------
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        16
           1       0.30      0.88      0.45         8

    accuracy                           0.29        24
   macro avg       0.15      0.44      0.23        24
weighted avg       0.10      0.29      0.15        24




In [109]:
def get_accuracy(y_train, y_predicted): 
    return (y_train == y_predicted).mean()

# train data
X_range = np.linspace(X.min(), X.max(), 100)
y_predicted = model3.predict(X_range.reshape(-1, 3)) # change the y value in X_range.reshape(x,y) if feeding different input node value
accuracy = get_accuracy(y_train, y_predicted)
print("Accuracy: %.3f"%(accuracy*100))

Accuracy: 55.319


# End of the Code