In [None]:
import os
import sys

project_root = os.getcwd()
while project_root.split("\\")[-1].lower() not in  ["computervisionsoc", "computervisionsociety"]:
    project_root = os.path.abspath(os.path.join(project_root, '..'))

sys.path.append(project_root)

In [None]:
from CV101.models import Sequential, load_model
from CV101.optimizers import SGD, Momentum, RMSprop, Adam
from CV101.metrics import accuracy, mae, precision, recall
from CV101.layers import Dense, Conv2D, MaxPool2D, Dropout, BatchNormalization, Flatten
from CV101.losses import MeanSquaredError, CategoricalCrossEntropy, BinaryCrossEntropy
from CV101.activations import ReLU, LeakyReLU, ELU, Sigmoid, Tanh, Linear
from CV101.initializers import HeInitializer, XavierInitializer
from CV101.regularizers import regularizers

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer

while os.getcwd().split("\\")[-1].lower() not in ["computervisionsoc", "computervisionsociety"]:
    os.chdir("..")
train = pd.read_csv("datasets\\titanic\\train.csv", index_col="PassengerId")

train.drop(columns=["Ticket", "Cabin", "Name"], inplace=True)

cleaned = train.dropna()

encoder = OneHotEncoder(sparse_output=False)
encoded = np.array(encoder.fit_transform(cleaned[["Sex", "Embarked"]]))
encoded_name = encoder.get_feature_names_out(["Sex", "Embarked"])


encoded_df = pd.DataFrame(encoded, columns=encoded_name, index=cleaned.index)


training = pd.concat([cleaned, encoded_df], axis=1)
training.drop(columns=["Sex", "Embarked"], inplace=True)

X_train = training.drop(columns=["Survived"])
y_train = training[["Survived"]].copy()

X, y, X_val, y_val = X_train[:600], y_train[:600], X_train[600:], y_train[600:]
X.shape, y.shape, X_val.shape, y_val.shape

## 1. Gradient Vanishing

In [None]:
model = Sequential()
model.add(Dense(32, activation=Tanh(), initializer=XavierInitializer(mode="uniform")))
model.add(Dense(32, activation=Tanh(), initializer=XavierInitializer(mode="uniform")))
model.add(Dense(32, activation=Sigmoid(), initializer=XavierInitializer(mode="uniform")))
model.add(Dense(32, activation=Tanh(), initializer=XavierInitializer(mode="uniform")))
model.add(Dense(32, activation=Sigmoid(), initializer=XavierInitializer(mode="uniform")))
model.add(Dense(32, activation=Sigmoid(), initializer=XavierInitializer(mode="uniform")))
model.add(Dense(32, activation=Tanh(), initializer=XavierInitializer(mode="uniform")))
model.add(Dense(10, activation=Sigmoid(), initializer=XavierInitializer(mode="uniform")))
model.add(Dense(1, activation=Sigmoid(), initializer=XavierInitializer(mode="uniform")))


model.compile(loss=BinaryCrossEntropy(), optimizer=Adam(learning_rate=1e-6))

In [None]:
model.predict(X)[:10]

In [None]:
model.fit(X, y, epoch=10)

In [None]:
def plot_W(model):
    plt.figure(figsize=(20, 15))
    length = len(model.layers)
    for count, layer in enumerate(model.layers):
        W = layer.W
        plt.subplot(length//3+2, 3, count+1)
        plt.hist(W.reshape(-1))

    mean_val = [np.mean(np.abs((layer.dW))) for layer in model.layers]
    plt.subplot(length//3+2, 3, length+1)
    plt.plot(mean_val)
    
    
    plt.show()

plot_W(model)

In [None]:
model.fit(X, y, epoch=1000)

In [None]:
plot_W(model)

## 2. Gradient exploding

In [None]:
model = Sequential()
model.add(Dense(40, activation=ReLU()))
model.add(Dense(40, activation=ReLU()))
model.add(Dense(40, activation=ReLU()))
model.add(Dense(40, activation=ReLU()))
model.add(Dense(40, activation=ReLU()))
model.add(Dense(40, activation=ReLU()))
model.add(Dense(32, activation=ReLU()))
model.add(Dense(10, activation=ReLU()))
model.add(Dense(1, activation=Sigmoid()))


model.compile(loss=BinaryCrossEntropy(), optimizer=Adam(learning_rate=1e-2))

In [None]:
model.predict(X)[:10]

In [None]:
model.fit(X, y, epoch=1)

In [None]:
plot_W(model)

In [None]:
model.fit(X, y, epoch=1000)

In [None]:
plot_W(model)

## 3. Overfitting

In [None]:
model = Sequential()
model.add(Dense(256, activation=ReLU(), initializer=HeInitializer(mode="Normal")))
model.add(Dense(128, activation=ReLU(), initializer=HeInitializer(mode="Normal")))
model.add(Dense(64, activation=ReLU(), initializer=HeInitializer(mode="Normal")))
model.add(Dense(32, activation=ReLU(), initializer=HeInitializer(mode="Normal")))
model.add(Dense(10, activation=ReLU(), initializer=HeInitializer(mode="Normal")))
model.add(Dense(1, activation=Sigmoid(), initializer=XavierInitializer(mode="Normal")))


model.compile(loss=BinaryCrossEntropy(), optimizer=Adam(learning_rate=1e-4), metrics=[accuracy])

In [None]:
model.predict(X)[:10]

In [None]:
model.fit(X, y, epoch=1000, validation_data = [X_val, y_val])

In [None]:
def plot_metric(model):
    plt.figure(figsize=(20, 10))
    length = len(model.hist)
    for count, metric in enumerate(model.hist):
        train_metric = model.hist[metric]["train"]
        val_metric = model.hist[metric]["val"]
    
        plt.subplot(length//2 + 1, 2, count+1)

        plt.xlabel("Epochs")
        plt.ylabel(metric)
        plt.title(f"{metric} over epochs")
        
        plt.plot(train_metric, label="training")
        plt.plot(val_metric, label="validation")

        plt.legend()
    
    plt.show()

plot_metric(model)

## 4. Underfitting

In [None]:
model = Sequential()

model.add(Dense(1, activation=Sigmoid()))

model.compile(loss=BinaryCrossEntropy(), optimizer=SGD(learning_rate=5e-2), metrics=[accuracy])

In [None]:
model.predict(X)[:10]

In [None]:
model.fit(X, y, epoch=1000, validation_data=[X_val, y_val])

In [None]:
plot_metric(model)

## 5. Slow convergence

In [None]:
model = Sequential()

model.add(Dense(64, activation=ReLU()))
model.add(Dense(10, activation=ReLU()))
model.add(Dense(1, activation=Sigmoid()))


model.compile(loss=BinaryCrossEntropy(), optimizer=SGD(learning_rate=1e-6), metrics=[accuracy])

In [None]:
model.predict(X)[:10]

In [None]:
model.fit(X, y, epoch=1000, validation_data=[X_val, y_val])

In [None]:
plot_metric(model)

## 6. Correct Version?

In [None]:
model = Sequential()
model.add(Dense(32, activation=ReLU(), initializer=HeInitializer(mode="normal")))
model.add(Dense(10, activation=ReLU(), initializer=HeInitializer(mode="normal")))
# model.add(Dropout(0.5))
model.add(Dense(1, activation=Sigmoid(), initializer=XavierInitializer(mode="uniform")))


model.compile(loss=BinaryCrossEntropy(), optimizer=Adam(learning_rate=1e-4), metrics=[accuracy])

In [None]:
model.predict(X)[:10]

In [None]:
model.evaluate(X_val, y_val)

In [None]:
model.fit(X, y, epoch=1)

In [None]:
plot_W(model)

In [None]:
model.fit(X, y, validation_data=[X_val, y_val], epoch=2000, batch_size=32, shuffle=True)

In [None]:
plot_metric(model)

In [None]:
model.evaluate(X_val, y_val)