In [1]:
import numpy as np
import pandas as pd

In [2]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))
    
def sigmoid_derivative(x):
    return x * (1 - x)

In [3]:
class NeuralNetwork:
    def __init__(self, input_size, hidden1_size, hidden2_size, output_size, learning_rate):
        # Initialize weights with small random values
        self.weights1 = np.random.randn(input_size, hidden1_size) * 0.01
        self.weights2 = np.random.randn(hidden1_size, hidden2_size) * 0.01
        self.weights3 = np.random.randn(hidden2_size, output_size) * 0.01

        # Bias terms
        self.bias1 = np.zeros((1, hidden1_size))
        self.bias2 = np.zeros((1, hidden2_size))
        self.bias3 = np.zeros((1, output_size))

        # Learning rate
        self.learning_rate = learning_rate

    def forward(self, X):
        # Forward propagation
        self.z1 = np.dot(X, self.weights1) + self.bias1
        self.a1 = sigmoid(self.z1)

        self.z2 = np.dot(self.a1, self.weights2) + self.bias2
        self.a2 = sigmoid(self.z2)

        self.z3 = np.dot(self.a2, self.weights3) + self.bias3
        self.a3 = sigmoid(self.z3)

        return self.a3

    def backward(self, X, y, output):
        # Compute the error
        error = output - y

        # Backpropagation
        d_output = error * sigmoid_derivative(output)
        d_weights3 = np.dot(self.a2.T, d_output)

        d_hidden2 = np.dot(d_output, self.weights3.T) * sigmoid_derivative(self.a2)
        d_weights2 = np.dot(self.a1.T, d_hidden2)

        d_hidden1 = np.dot(d_hidden2, self.weights2.T) * sigmoid_derivative(self.a1)
        d_weights1 = np.dot(X.T, d_hidden1)

        # Update weights and biases
        self.weights3 -= self.learning_rate * d_weights3
        self.bias3 -= self.learning_rate * np.sum(d_output, axis=0, keepdims=True)

        self.weights2 -= self.learning_rate * d_weights2
        self.bias2 -= self.learning_rate * np.sum(d_hidden2, axis=0, keepdims=True)

        self.weights1 -= self.learning_rate * d_weights1
        self.bias1 -= self.learning_rate * np.sum(d_hidden1, axis=0, keepdims=True)

    def train(self, X, y, epochs):
        for epoch in range(epochs):
            output = self.forward(X)
            self.backward(X, y, output)

            if (epoch + 1) % 100 == 0:
                loss = np.mean((y - output) ** 2)
                print(f"Epoch {epoch + 1}, Loss: {loss}")

In [4]:
# Load data
train_data = pd.read_csv("train.csv", header=None)
test_data = pd.read_csv("test.csv", header=None)

X_train = train_data.iloc[:, :-1].values
y_train = train_data.iloc[:, -1].values.reshape(-1, 1)

X_test = test_data.iloc[:, :-1].values
y_test = test_data.iloc[:, -1].values.reshape(-1, 1)

In [5]:
# Normalize features
X_train = (X_train - np.mean(X_train, axis=0)) / np.std(X_train, axis=0)
X_test = (X_test - np.mean(X_test, axis=0)) / np.std(X_test, axis=0)

In [6]:
# Define network architecture
input_size = X_train.shape[1]
hidden1_size = 5
hidden2_size = 5
output_size = 1

In [7]:
# Create and train the neural network
learning_rate = 0.01
nn = NeuralNetwork(input_size, hidden1_size, hidden2_size, output_size, learning_rate)
nn.train(X_train, y_train, epochs=1000)

Epoch 100, Loss: 0.24709158473115123
Epoch 200, Loss: 0.2470808007960147
Epoch 300, Loss: 0.2469889685692702
Epoch 400, Loss: 0.21036139941343485
Epoch 500, Loss: 0.012469768135316854
Epoch 600, Loss: 0.008819984517570836
Epoch 700, Loss: 0.007628763108103337
Epoch 800, Loss: 0.007034864947416916
Epoch 900, Loss: 0.0066778604317227815
Epoch 1000, Loss: 0.006441353794886822


In [8]:
# Test the model
predictions = nn.forward(X_test)
predictions = (predictions > 0.5).astype(int)
accuracy = np.mean(predictions == y_test)
print(f"Test Accuracy: {accuracy}")

Test Accuracy: 0.986


In [9]:
class NeuralNetwork:
    def __init__(self, input_size, hidden1_size, hidden2_size, output_size, gamma_0, d):
        # Initialize weights with small random values
        self.weights1 = np.random.randn(input_size, hidden1_size) * 0.01
        self.weights2 = np.random.randn(hidden1_size, hidden2_size) * 0.01
        self.weights3 = np.random.randn(hidden2_size, output_size) * 0.01

        # Bias terms
        self.bias1 = np.zeros((1, hidden1_size))
        self.bias2 = np.zeros((1, hidden2_size))
        self.bias3 = np.zeros((1, output_size))

        # Learning rate schedule parameters
        self.gamma_0 = gamma_0
        self.d = d

    def forward(self, X):
        # Forward propagation
        self.z1 = np.dot(X, self.weights1) + self.bias1
        self.a1 = sigmoid(self.z1)

        self.z2 = np.dot(self.a1, self.weights2) + self.bias2
        self.a2 = sigmoid(self.z2)

        self.z3 = np.dot(self.a2, self.weights3) + self.bias3
        self.a3 = sigmoid(self.z3)

        return self.a3

    def backward(self, X, y, output, learning_rate):
        # Compute the error
        error = output - y

        # Backpropagation
        d_output = error * sigmoid_derivative(output)
        d_weights3 = np.dot(self.a2.T, d_output)

        d_hidden2 = np.dot(d_output, self.weights3.T) * sigmoid_derivative(self.a2)
        d_weights2 = np.dot(self.a1.T, d_hidden2)

        d_hidden1 = np.dot(d_hidden2, self.weights2.T) * sigmoid_derivative(self.a1)
        d_weights1 = np.dot(X.T, d_hidden1)

        # Update weights and biases
        self.weights3 -= learning_rate * d_weights3
        self.bias3 -= learning_rate * np.sum(d_output, axis=0, keepdims=True)

        self.weights2 -= learning_rate * d_weights2
        self.bias2 -= learning_rate * np.sum(d_hidden2, axis=0, keepdims=True)

        self.weights1 -= learning_rate * d_weights1
        self.bias1 -= learning_rate * np.sum(d_hidden1, axis=0, keepdims=True)

    def train(self, X, y, epochs):
        for epoch in range(epochs):
            # Shuffle the data
            indices = np.arange(X.shape[0])
            np.random.shuffle(indices)
            X = X[indices]
            y = y[indices]

            for t, (x_batch, y_batch) in enumerate(zip(X, y)):
                # Calculate the learning rate for the current step
                learning_rate = self.gamma_0 / (1 + self.gamma_0 * self.d * t)

                # Forward and backward propagation
                output = self.forward(x_batch.reshape(1, -1))
                self.backward(x_batch.reshape(1, -1), y_batch.reshape(1, -1), output, learning_rate)

            if (epoch + 1) % 100 == 0:
                loss = np.mean((y - self.forward(X)) ** 2)
                print(f"Epoch {epoch + 1}, Loss: {loss}")


In [10]:
# Load data
X_train = train_data.iloc[:, :-1].values
y_train = train_data.iloc[:, -1].values.reshape(-1, 1)

X_test = test_data.iloc[:, :-1].values
y_test = test_data.iloc[:, -1].values.reshape(-1, 1)

# Normalize features
X_train = (X_train - np.mean(X_train, axis=0)) / np.std(X_train, axis=0)
X_test = (X_test - np.mean(X_test, axis=0)) / np.std(X_test, axis=0)

In [11]:
# Define network architecture and parameters
input_size = X_train.shape[1]
output_size = 1
hidden_layer_sizes = [5, 10, 25, 50, 100]

In [19]:
def train_and_evaluate(hidden_layer_size):
    print(f"\nTraining with hidden layer size: {hidden_layer_size}")
    gamma_0 = 0.1  # Initial learning rate
    d = 0.01  # Decay rate
    nn = NeuralNetwork(input_size, hidden_layer_size, hidden_layer_size, output_size, gamma_0, d)
    nn.train(X_train, y_train, epochs=1000)

    # Test the model
    predictions = nn.forward(X_test)
    predictions = (predictions > 0.5).astype(int)
    accuracy = np.mean(predictions == y_test)
    print(f"Test Accuracy for hidden layer size {hidden_layer_size}: {accuracy}")

In [13]:
# Train the network for different hidden layer sizes
for hidden_size in hidden_layer_sizes:
    train_and_evaluate(hidden_size)


Training with hidden layer size: 5
Epoch 100, Loss: 0.006866099258760941
Epoch 200, Loss: 0.0007674457242949015
Epoch 300, Loss: 0.00032689691887107376
Epoch 400, Loss: 0.00020105915952417839
Epoch 500, Loss: 0.0001423511558454546
Epoch 600, Loss: 0.00010885505048478011
Epoch 700, Loss: 8.713363673772276e-05
Epoch 800, Loss: 7.219700744921345e-05
Epoch 900, Loss: 6.132828243146056e-05
Epoch 1000, Loss: 5.303882893383915e-05
Test Accuracy for hidden layer size 5: 1.0

Training with hidden layer size: 10
Epoch 100, Loss: 0.008052095017657506
Epoch 200, Loss: 0.001609803145795471
Epoch 300, Loss: 0.00045865740349318077
Epoch 400, Loss: 0.0002446850400585289
Epoch 500, Loss: 0.00016057438434301631
Epoch 600, Loss: 0.00011669192712236492
Epoch 700, Loss: 9.051740327218446e-05
Epoch 800, Loss: 7.318950634337004e-05
Epoch 900, Loss: 6.101475319013618e-05
Epoch 1000, Loss: 5.20997293577735e-05
Test Accuracy for hidden layer size 10: 1.0

Training with hidden layer size: 25
Epoch 100, Loss: 0.

In [22]:
# Part (c): Initialize weights to zero and re-train
gamma_0 = 0.1  # Initial learning rate
d = 0.01  # Decay rate
class ZeroWeightNeuralNetwork(NeuralNetwork):
    def __init__(self, input_size, hidden1_size, hidden2_size, output_size, gamma_0, d):
        # Initialize weights and biases to zero
        self.weights1 = np.zeros((input_size, hidden1_size))
        self.weights2 = np.zeros((hidden1_size, hidden2_size))
        self.weights3 = np.zeros((hidden2_size, output_size))

        self.bias1 = np.zeros((1, hidden1_size))
        self.bias2 = np.zeros((1, hidden2_size))
        self.bias3 = np.zeros((1, output_size))

        self.gamma_0 = gamma_0
        self.d = d

In [23]:
# Train with zero-initialized weights
print("\nTraining with zero-initialized weights")
nn_zero = ZeroWeightNeuralNetwork(input_size, 10, 10, output_size, gamma_0, d)
nn_zero.train(X_train, y_train, epochs=1000)


Training with zero-initialized weights
Epoch 100, Loss: 0.008180505521284749
Epoch 200, Loss: 0.006419787328776942
Epoch 300, Loss: 0.006031603766013599
Epoch 400, Loss: 0.0059288266630830675
Epoch 500, Loss: 0.005854619153580543
Epoch 600, Loss: 0.0057878404758818
Epoch 700, Loss: 0.005927932017731422
Epoch 800, Loss: 0.005814363531782429
Epoch 900, Loss: 0.005762574344481722
Epoch 1000, Loss: 0.005750199908237601


In [24]:
# Test the model with zero-initialized weights
predictions = nn_zero.forward(X_test)
predictions = (predictions > 0.5).astype(int)
accuracy = np.mean(predictions == y_test)
print(f"Test Accuracy with zero-initialized weights: {accuracy}")

Test Accuracy with zero-initialized weights: 0.992
