# CAPSTONE PROJECT: phase 2

# Feedforward Neural Network model

In [34]:
# Tensorflow / Keras
from tensorflow import keras 
print('Tensorflow/Keras: %s' % keras.__version__) # print version
from keras.models import Sequential 
from keras import Input 
from keras.layers import Dense 

# Data manipulation
import pandas as pd 
print('pandas: %s' % pd.__version__) # print version
import numpy as np 
print('numpy: %s' % np.__version__) # print version

# Sklearn
import sklearn 
print('sklearn: %s' % sklearn.__version__) # print version
from sklearn.model_selection import train_test_split 
from sklearn.metrics import classification_report 

# Visualization
import plotly 
import plotly.express as px
import plotly.graph_objects as go
print('plotly: %s' % plotly.__version__) # print version

Tensorflow/Keras: 2.12.0
pandas: 1.5.3
numpy: 1.23.5
sklearn: 1.2.1
plotly: 5.9.0


In [35]:
# Set Pandas options to display more columns
pd.options.display.max_columns=50

# Read the Kerala data csv
df=pd.read_csv('kerala.csv', encoding='utf-8')

# Drop records where target FLOODS
df=df[pd.isnull(df['FLOODS'])==False]

# For other columns with missing values, fill them in with column mean
df=df.fillna(df.mean())

# Create a flag for FLOODS, note FLOODS will be our target variable
df['FLOODS_Flag']=df['FLOODS'].apply(lambda x: 1 if x=='Yes' else 0)

# Print data
df





Unnamed: 0,SUBDIVISION,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,ANNUAL RAINFALL,FLOODS,FLOODS_Flag
0,KERALA,1901,28.7,44.7,51.6,160.0,174.7,824.6,743.0,357.5,197.7,266.9,350.8,48.4,3248.6,YES,0
1,KERALA,1902,6.7,2.6,57.3,83.9,134.5,390.9,1205.0,315.8,491.6,358.4,158.3,121.5,3326.6,YES,0
2,KERALA,1903,3.2,18.6,3.1,83.6,249.7,558.6,1022.5,420.2,341.8,354.1,157.0,59.0,3271.2,YES,0
3,KERALA,1904,23.7,3.0,32.2,71.5,235.7,1098.2,725.5,351.8,222.7,328.1,33.9,3.3,3129.7,YES,0
4,KERALA,1905,1.2,22.3,9.4,105.9,263.3,850.2,520.5,293.6,217.2,383.5,74.4,0.2,2741.6,NO,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113,KERALA,2014,4.6,10.3,17.9,95.7,251.0,454.4,677.8,733.9,298.8,355.5,99.5,47.2,3046.4,YES,0
114,KERALA,2015,3.1,5.8,50.1,214.1,201.8,563.6,406.0,252.2,292.9,308.1,223.6,79.4,2600.6,NO,0
115,KERALA,2016,2.4,3.8,35.9,143.0,186.4,522.2,412.3,325.5,173.2,225.9,125.4,23.6,2176.6,NO,0
116,KERALA,2017,1.9,6.8,8.9,43.6,173.5,498.5,319.6,531.8,209.5,192.4,92.5,38.1,2117.1,NO,0


In [36]:
df['FLOODS'].replace(['YES','NO'],[1,0],inplace=True)
df

Unnamed: 0,SUBDIVISION,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,ANNUAL RAINFALL,FLOODS,FLOODS_Flag
0,KERALA,1901,28.7,44.7,51.6,160.0,174.7,824.6,743.0,357.5,197.7,266.9,350.8,48.4,3248.6,1,0
1,KERALA,1902,6.7,2.6,57.3,83.9,134.5,390.9,1205.0,315.8,491.6,358.4,158.3,121.5,3326.6,1,0
2,KERALA,1903,3.2,18.6,3.1,83.6,249.7,558.6,1022.5,420.2,341.8,354.1,157.0,59.0,3271.2,1,0
3,KERALA,1904,23.7,3.0,32.2,71.5,235.7,1098.2,725.5,351.8,222.7,328.1,33.9,3.3,3129.7,1,0
4,KERALA,1905,1.2,22.3,9.4,105.9,263.3,850.2,520.5,293.6,217.2,383.5,74.4,0.2,2741.6,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113,KERALA,2014,4.6,10.3,17.9,95.7,251.0,454.4,677.8,733.9,298.8,355.5,99.5,47.2,3046.4,1,0
114,KERALA,2015,3.1,5.8,50.1,214.1,201.8,563.6,406.0,252.2,292.9,308.1,223.6,79.4,2600.6,0,0
115,KERALA,2016,2.4,3.8,35.9,143.0,186.4,522.2,412.3,325.5,173.2,225.9,125.4,23.6,2176.6,0,0
116,KERALA,2017,1.9,6.8,8.9,43.6,173.5,498.5,319.6,531.8,209.5,192.4,92.5,38.1,2117.1,0,0


In [37]:
#1 - Select data for modeling
X=df[[' ANNUAL RAINFALL']]
y=df['FLOODS'].values


# Create training and testing samples
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


# Specify the structure of a Neural Network
model = Sequential(name="Model-with-One-Input") # Model Sequential Keras API
model.add(Input(shape=(1,), name='Input-Layer')) 
model.add(Dense(2, activation='softplus', name='Hidden-Layer')) 
model.add(Dense(1, activation='sigmoid', name='Output-Layer')) 


# Compile keras model
model.compile(optimizer='adam', 
              loss='binary_crossentropy',
              metrics=['Accuracy', 'Precision', 'Recall'], 
              loss_weights=None, 
              weighted_metrics=None, 
              run_eagerly=None,
              steps_per_execution=None 
             )

# Fit keras model on the dataset (Backpropagation)
model.fit(X_train, 
          y_train, 
          batch_size=10, # No. of samples. Backpropagation performed on each batch data 
          epochs=3, # No. of times dataset pass thru model during training
          verbose='auto', 
          callbacks=None, 
          validation_split=0.2, # proportion of the train data that will be used for the validation set to monitor model's performance when training         
          shuffle=True, # shuffling train data before epoch to avoid bias
          class_weight={0 : 0.3, 1 : 0.7}, # addressing class imbalance issue in the train data. 
          sample_weight=None, # optional weight to be applied to the samples
          initial_epoch=0, # default training starting at 0
          steps_per_epoch=None,  # No. of batches to run in the epochs
          validation_steps=None, # No. of steps to run in each validation epoch
          validation_batch_size=None, # size of validation batch
          validation_freq=3, # Frequency of validation runtime during training
          max_queue_size=10, # max size of generator queue
         )

# Use model to make predictions
# Predict class labels on training data
pred_labels_tr = (model.predict(X_train) > 0.5).astype(int)
# Predict class labels on a test data
pred_labels_te = (model.predict(X_test) > 0.5).astype(int)


# Model Performance Summary
print("")
print('-------------------- Model Summary --------------------')
model.summary() # print model summary
print("")
print('-------------------- Weights and Biases --------------------')
for layer in model.layers:
    print("Layer: ", layer.name) # print layer name
    print("  --Kernels (Weights): ", layer.get_weights()[0]) # weights
    print("  --Biases: ", layer.get_weights()[1]) # biases

Epoch 1/3
Epoch 2/3
Epoch 3/3

-------------------- Model Summary --------------------
Model: "Model-with-One-Input"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Hidden-Layer (Dense)        (None, 2)                 4         
                                                                 
 Output-Layer (Dense)        (None, 1)                 3         
                                                                 
Total params: 7
Trainable params: 7
Non-trainable params: 0
_________________________________________________________________

-------------------- Weights and Biases --------------------
Layer:  Hidden-Layer
  --Kernels (Weights):  [[-0.29642928  0.25756797]]
  --Biases:  [ 0.         -0.02295491]
Layer:  Output-Layer
  --Kernels (Weights):  [[1.0468575]
 [1.2655572]]
  --Biases:  [-0.02298055]


In [38]:
print("")
print('---------- Evaluation on Training Data ----------')
print(classification_report(y_train, pred_labels_tr))
print("")

print('---------- Evaluation on Test Data ----------')
print(classification_report(y_test, pred_labels_te))
print("")


---------- Evaluation on Training Data ----------
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        42
           1       0.55      1.00      0.71        52

    accuracy                           0.55        94
   macro avg       0.28      0.50      0.36        94
weighted avg       0.31      0.55      0.39        94


---------- Evaluation on Test Data ----------
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        16
           1       0.33      1.00      0.50         8

    accuracy                           0.33        24
   macro avg       0.17      0.50      0.25        24
weighted avg       0.11      0.33      0.17        24





Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



In [39]:
X_range = np.linspace(X.min(), X.max(), 100)
y_predicted = model.predict(X_range.reshape(-1, 1))

# Create a scatter plot
fig = px.scatter(x=X_range.ravel(), y=y_predicted.ravel(), 
                 opacity=0.8, color_discrete_sequence=['black'],
                 labels=dict(x="Annual Rainfall", y="FLOODS indicator where 1=YES and 0=NO",))

# Change chart background color
fig.update_layout(dict(plot_bgcolor = 'white'))

# Update axes lines
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')

fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')

# Set figure title
fig.update_layout(title=dict(text="Feed Forward Neural Network (1 Input) Model Results", 
                             font=dict(color='black')))
# Update marker size
fig.update_traces(marker=dict(size=7))

fig.show()



In [40]:
def get_accuracy(y_train, y_predicted): 
    return (y_train == y_predicted).mean()

# train data
accuracy = get_accuracy(y_train, y_predicted)
print("Accuracy: %.3f"%(accuracy*100))

Accuracy: 55.319


In [41]:
# Select data for modeling
X=df[['NOV', 'DEC']]
y=df['FLOODS'].values


# Create training and testing samples
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


# Specify the structure of a neural network
model2 = Sequential(name="Model-with-Two-Inputs") # Model Sequential Keras API
model2.add(Input(shape=(2,), name='Input-Layer')) 
model2.add(Dense(4, activation='softplus', name='Hidden-Layer')) 
model2.add(Dense(1, activation='sigmoid', name='Output-Layer')) 


# Compile the keras model
model2.compile(optimizer='adam', 
              loss='binary_crossentropy', 
              metrics=['Accuracy', 'Precision', 'Recall'],  
              loss_weights=None, 
              weighted_metrics=None, 
              run_eagerly=None, 
              steps_per_execution=None
             )

# Fit keras model on the dataset (Backpropagation)
model2.fit(X_train, 
          y_train, 
          batch_size=10, # No. of samples. Backpropagation performed on each batch data 
          epochs=3, # No. of times dataset pass thru model during training
          verbose='auto', 
          callbacks=None, 
          validation_split=0.2, # proportion of the train data that will be used for the validation set to monitor model's performance when training         
          shuffle=True, # shuffling train data before epoch to avoid bias
          class_weight={0 : 0.3, 1 : 0.7}, # addressing class imbalance issue in the train data. 
          sample_weight=None, # optional weight to be applied to the samples
          initial_epoch=0, # default training starting at 0
          steps_per_epoch=None,  # No. of batches to run in the epochs
          validation_steps=None, # No. of steps to run in each validation epoch
          validation_batch_size=None, # size of validation batch
          validation_freq=3, # Frequency of validation runtime during training
          max_queue_size=10, # max size of generator queue
         )

# Use model to make predictions
# Predict class labels on training data
pred_labels_tr = (model2.predict(X_train) > 0.5).astype(int)
# Predict class labels on a test data
pred_labels_te = (model2.predict(X_test) > 0.5).astype(int)


# Model Performance Summary
print("")
print('-------------------- Model Summary --------------------')
model2.summary() # print model summary
print("")
print('-------------------- Weights and Biases --------------------')
for layer in model2.layers:
    print("Layer: ", layer.name) # print layer name
    print("  --Kernels (Weights): ", layer.get_weights()[0]) # kernels (weights)
    print("  --Biases: ", layer.get_weights()[1]) # biases
    

Epoch 1/3
Epoch 2/3
Epoch 3/3

-------------------- Model Summary --------------------
Model: "Model-with-Two-Inputs"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Hidden-Layer (Dense)        (None, 4)                 12        
                                                                 
 Output-Layer (Dense)        (None, 1)                 5         
                                                                 
Total params: 17
Trainable params: 17
Non-trainable params: 0
_________________________________________________________________

-------------------- Weights and Biases --------------------
Layer:  Hidden-Layer
  --Kernels (Weights):  [[ 0.36855835  0.90142965  0.6726997   0.7810755 ]
 [-0.5459246  -0.90606105 -0.5440302  -0.5878933 ]]
  --Biases:  [ 0.00217287 -0.00365924 -0.00372955  0.00362022]
Layer:  Output-Layer
  --Kernels (Weights):  [[ 0.8667442 ]
 [-0.33898982]
 [-0.09111

In [42]:
print("")
print('------------- Evaluation on Training Data -------------')
print(classification_report(y_train, pred_labels_tr))
print("")

print('------------- Evaluation on Test Data -------------')
print(classification_report(y_test, pred_labels_te))
print("")


------------- Evaluation on Training Data -------------
              precision    recall  f1-score   support

           0       0.60      0.36      0.45        42
           1       0.61      0.81      0.69        52

    accuracy                           0.61        94
   macro avg       0.60      0.58      0.57        94
weighted avg       0.60      0.61      0.58        94


------------- Evaluation on Test Data -------------
              precision    recall  f1-score   support

           0       0.50      0.38      0.43        16
           1       0.17      0.25      0.20         8

    accuracy                           0.33        24
   macro avg       0.33      0.31      0.31        24
weighted avg       0.39      0.33      0.35        24




In [43]:
def Plot_3D(X, X_test, y_test, clf, x1, x2, mesh_size, margin):
            
    # Specify a size of the mesh to be used
    mesh_size=mesh_size
    margin=margin

    # Create a mesh grid on which we will run our model
    x_min, x_max = X.iloc[:, 0].min() - margin, X.iloc[:, 0].max() + margin
    y_min, y_max = X.iloc[:, 1].min() - margin, X.iloc[:, 1].max() + margin
    xrange = np.arange(x_min, x_max, mesh_size)
    yrange = np.arange(y_min, y_max, mesh_size)
    xx, yy = np.meshgrid(xrange, yrange)
            
    # Calculate Neural Network predictions on the grid
    Z = model2.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    # Create a 3D scatter plot
    fig = px.scatter_3d(x=X_test[x1], y=X_test[x2], z=y_test,
                     opacity=0.8, color_discrete_sequence=['black'], height=500, width=600)

    # Set figure title and colors
    fig.update_layout(
                      paper_bgcolor = 'white',
                      scene_camera=dict(up=dict(x=0, y=0, z=1), 
                                        center=dict(x=0, y=0, z=-0.1),
                                        eye=dict(x=0.75, y=-1.75, z=1)),
                                        margin=dict(l=0, r=0, b=0, t=0),
                      scene = dict(xaxis=dict(title=x1,
                                              backgroundcolor='white',
                                              color='black',
                                              gridcolor='#f0f0f0'),
                                   yaxis=dict(title=x2,
                                              backgroundcolor='white',
                                              color='black',
                                              gridcolor='#f0f0f0'
                                              ),
                                   zaxis=dict(title='Probability of Flood',
                                              backgroundcolor='lightgrey',
                                              color='black', 
                                              gridcolor='#f0f0f0', 
                                              )))
    
    # Update marker size
    fig.update_traces(marker=dict(size=1))

    # Add prediction plane
    fig.add_traces(go.Surface(x=xrange, y=yrange, z=Z, name='FF NN Prediction Plane',
                              colorscale='Bluered',
                              reversescale=True,
                              showscale=False, 
                              contours = {"z": {"show": True, "start": 0.5, "end": 0.9, "size": 0.5}}))
    fig.show()
    return fig


# Call the above function
fig = Plot_3D(X, X_test, y_test, model2, x1='NOV', x2='DEC', mesh_size=1, margin=0)



In [44]:
def get_accuracy(y_train, y_predicted): 
    return (y_train == y_predicted).mean()

# train data
accuracy = get_accuracy(y_train, y_predicted)
print("Accuracy: %.3f"%(accuracy*100))

Accuracy: 55.319


# End of the Code