## Overfitting vs Underfitting

<img src="https://media.geeksforgeeks.org/wp-content/cdn-uploads/20190523171258/overfitting_2.png" alt="img" width="700"/>

### Dropout neurons help with overfitting and avoid bias

## Preprocessing Data

In [87]:
import warnings
import numpy as np
import pandas as pd
import seaborn as sn
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

warnings.filterwarnings("ignore")

df = pd.read_csv("11_Sonar_dataset.csv", header=None) # this file doesn't have headers or column names

print(df.shape)
print(df.columns)
print(df[60].value_counts())
df.sample(5)

(208, 61)
Int64Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
            34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
            51, 52, 53, 54, 55, 56, 57, 58, 59, 60],
           dtype='int64')
M    111
R     97
Name: 60, dtype: int64


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
77,0.0336,0.0294,0.0476,0.0539,0.0794,0.0804,0.1136,0.1228,0.1235,0.0842,...,0.015,0.0111,0.0032,0.0035,0.0169,0.0137,0.0015,0.0069,0.0051,R
50,0.0353,0.0713,0.0326,0.0272,0.037,0.0792,0.1083,0.0687,0.0298,0.088,...,0.0163,0.0242,0.0043,0.0202,0.0108,0.0037,0.0096,0.0093,0.0053,R
93,0.0459,0.0437,0.0347,0.0456,0.0067,0.089,0.1798,0.1741,0.1598,0.1408,...,0.0067,0.0032,0.0109,0.0164,0.0151,0.007,0.0085,0.0117,0.0056,R
179,0.0394,0.042,0.0446,0.0551,0.0597,0.1416,0.0956,0.0802,0.1618,0.2558,...,0.0146,0.004,0.0114,0.0032,0.0062,0.0101,0.0068,0.0053,0.0087,M
190,0.0156,0.021,0.0282,0.0596,0.0462,0.0779,0.1365,0.078,0.1038,0.1567,...,0.015,0.006,0.0082,0.0091,0.0038,0.0056,0.0056,0.0048,0.0024,M


In [88]:
X = df.drop(60, axis="columns")
y = df[60]

y = pd.get_dummies(y, drop_first=True) # apply one hot encoding
y.value_counts()

R
0    111
1     97
dtype: int64

In [89]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
X_train.shape

(166, 60)

## Overfitting Model

In [90]:
model = keras.Sequential([
    keras.layers.Dense(60, input_shape=(60, ), activation="relu"),
    keras.layers.Dense(30, activation="relu"),
    keras.layers.Dense(15, activation="relu"),
    keras.layers.Dense(1, activation="sigmoid")
])
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

model.fit(X_train, y_train, epochs=100, batch_size=8) # mini_batch method

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x20d7dec5c70>

In [91]:
model.evaluate(X_test, y_test)



[1.1551778316497803, 0.738095223903656]

In [92]:
y_pred = model.predict(X_test).reshape(-1)
y_pred = np.round(y_pred)
print("pred, actual", "\n", np.c_[y_pred, y_test][:10])

pred, actual 
 [[0. 0.]
 [1. 0.]
 [1. 0.]
 [0. 0.]
 [1. 1.]
 [1. 1.]
 [0. 0.]
 [1. 1.]
 [0. 0.]
 [1. 1.]]


In [93]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.70      0.80      0.74        20
           1       0.79      0.68      0.73        22

    accuracy                           0.74        42
   macro avg       0.74      0.74      0.74        42
weighted avg       0.74      0.74      0.74        42



## Model with Dropout Layer

In [94]:
model_new = keras.Sequential([
    keras.layers.Dense(60, input_shape=(60, ), activation="relu"),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(30, activation="relu"),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(15, activation="relu"),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(1, activation="sigmoid")
])
model_new.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

model_new.fit(X_train, y_train, epochs=100, batch_size=8)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x20d7ae83790>

In [95]:
model_new.evaluate(X_test, y_test)



[0.550304114818573, 0.7857142686843872]

In [96]:
y_pred_new = model_new.predict(X_test).reshape(-1)
y_pred_new = np.round(y_pred_new)
print(classification_report(y_test, y_pred_new))

              precision    recall  f1-score   support

           0       0.76      0.80      0.78        20
           1       0.81      0.77      0.79        22

    accuracy                           0.79        42
   macro avg       0.79      0.79      0.79        42
weighted avg       0.79      0.79      0.79        42

