In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

def load_data(file_path):
    data = pd.read_csv(file_path, header=None)
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values.reshape(-1, 1)
    return X, y

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def initialize_weights(input_size, hidden_size, output_size):
    weights = {
        'W1': np.random.randn(input_size, hidden_size),
        'W2': np.random.randn(hidden_size, hidden_size),
        'W3': np.random.randn(hidden_size, output_size)
    }
    return weights

def learning_rate_schedule(gamma0, d, t):
    return gamma0 / (1 + (gamma0 / d) * t)

def forward_propagation(X, weights):
    z1 = np.dot(X, weights['W1'])
    a1 = sigmoid(z1)

    z2 = np.dot(a1, weights['W2'])
    a2 = sigmoid(z2)

    z3 = np.dot(a2, weights['W3'])
    a3 = sigmoid(z3)

    return a1, a2, a3

def backward_propagation(X, y, a1, a2, a3, weights, learning_rate):
    error = a3 - y
    delta3 = error * sigmoid_derivative(a3)
    dW3 = np.outer(a2, delta3)

    delta2 = np.dot(delta3, weights['W3'].T) * sigmoid_derivative(a2)
    dW2 = np.outer(a1, delta2)

    delta1 = np.dot(delta2, weights['W2'].T) * sigmoid_derivative(a1)
    dW1 = np.outer(X, delta1)

    weights['W1'] -= learning_rate * dW1
    weights['W2'] -= learning_rate * dW2
    weights['W3'] -= learning_rate * dW3

    return weights

def objective_function(X, y, weights):
    _, _, predictions = forward_propagation(X, weights)
    error = y - predictions
    return np.mean(error**2) / 2

def train_neural_network_sgd(X_train, y_train, X_test, y_test, hidden_size, gamma0, d, epochs):
    input_size = X_train.shape[1]
    output_size = 1

    weights = initialize_weights(input_size, hidden_size, output_size)
    
    for epoch in range(epochs):
        indices = np.arange(len(X_train))
        np.random.shuffle(indices)
        X_train_shuffled = X_train[indices]
        y_train_shuffled = y_train[indices]

        for i in range(len(X_train_shuffled)):
            t = epoch * len(X_train_shuffled) + i + 1
            learning_rate = learning_rate_schedule(gamma0, d, t)

            a1, a2, a3 = forward_propagation(X_train_shuffled[i], weights)
            weights = backward_propagation(X_train_shuffled[i], y_train_shuffled[i], a1, a2, a3, weights, learning_rate)

        training_error = objective_function(X_train, y_train, weights)
        test_error = objective_function(X_test, y_test, weights)

    print(f"Final Training Error (Hidden Size: {hidden_size}): {training_error}")
    print(f"Final Test Error (Hidden Size: {hidden_size}): {test_error}")

# Load data
train_file_path = "C:\\Rushiil\\ML\\bank-note-1\\bank-note\\train.csv"
test_file_path = "C:\\Rushiil\\ML\\bank-note-1\\bank-note\\test.csv"

X_train, y_train = load_data(train_file_path)
X_test, y_test = load_data(test_file_path)

# Specify hyperparameters
hidden_sizes = [5, 10, 25, 50, 100]
gamma0 = 0.1
d = 0.009
epochs = 100

for hidden_size in hidden_sizes:
    print(f"\nTraining Neural Network with Hidden Size: {hidden_size}\n")
    train_neural_network_sgd(X_train, y_train, X_test, y_test, hidden_size, gamma0, d, epochs)



Training Neural Network with Hidden Size: 5

Final Training Error (Hidden Size: 5): 0.20825919602362764
Final Test Error (Hidden Size: 5): 0.21016226759977583

Training Neural Network with Hidden Size: 10

Final Training Error (Hidden Size: 10): 0.1261772676499121
Final Test Error (Hidden Size: 10): 0.12803510820540634

Training Neural Network with Hidden Size: 25

Final Training Error (Hidden Size: 25): 0.23929625001615457
Final Test Error (Hidden Size: 25): 0.23462353010671402

Training Neural Network with Hidden Size: 50

Final Training Error (Hidden Size: 50): 0.20687805691504296
Final Test Error (Hidden Size: 50): 0.2052587408013298

Training Neural Network with Hidden Size: 100

Final Training Error (Hidden Size: 100): 0.17797750809815735
Final Test Error (Hidden Size: 100): 0.1841684632825731
