<a href="https://colab.research.google.com/github/DeepLearningSaeid/Grad/blob/main/Pure_implimentation_SWAG_Numpy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
def X_activation(x):
    return x

def X_activation_derivative(x):
    return np.ones_like(x)

def X_2_activation(x):
    return (x**2) / 8

def X_2_activation_derivative(x):
    return (x / 4)

def X_3_activation(x):
    return (x**3) / 24

def X_3_activation_derivative(x):
    return (x**2) / 8

class NeuralNetwork:
    def __init__(self, input_size, hidden1_size, hidden2_size, hidden3_size, output_size):
        # Define the architecture
        self.input_size = input_size
        self.hidden1_size = hidden1_size
        self.hidden2_size = hidden2_size
        self.hidden3_size = hidden3_size
        self.output_size = output_size

        # Initialize weights and biases
        self.weights = {
            'W1': np.random.randn(input_size, hidden1_size),
            'W2': np.random.randn(input_size, hidden2_size),
            'W3': np.random.randn(input_size, hidden3_size),
            'W4': np.random.randn(hidden1_size + hidden2_size + hidden3_size, output_size)
        }

        self.biases = {
            'b1': np.random.randn(1, hidden1_size),
            'b2': np.random.randn(1, hidden2_size),
            'b3': np.random.randn(1, hidden3_size),
            'b4': np.random.randn(1, output_size)
        }

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        return x * (1 - x)

    def X_activation(self, x):
        return X_activation(x)

    def X_activation_derivative(self, x):
        return X_activation_derivative(x)

    def X_2_activation(self, x):
        return X_2_activation(x)

    def X_2_activation_derivative(self, x):
        return X_2_activation_derivative(x)

    def X_3_activation(self, x):
        return X_3_activation(x)

    def X_3_activation_derivative(self, x):
        return X_3_activation_derivative(x)

    def feedforward(self, X):
        # Layer 1 (X Activation)
        self.z1 = np.dot(X, self.weights['W1']) + self.biases['b1']
        self.a1 = self.X_activation(self.z1)  # Use X activation for layer

        # Layer 2
        self.z2 = np.dot(X, self.weights['W2']) + self.biases['b2']
        self.a2 = self.X_2_activation(self.z2)

        # Layer 3
        self.z3 = np.dot(X, self.weights['W3']) + self.biases['b3']
        self.a3 = self.X_3_activation(self.z3)

        # Concatenate the outputs of layers 1, 2, and 3
        self.concatenated_output = np.concatenate((self.a1, self.a2, self.a3), axis=1)

        # Layer 4 (Output Layer)
        self.z4 = np.dot(self.concatenated_output, self.weights['W4']) + self.biases['b4']
        self.output = self.X_activation(self.z4)

        return self.output

    def backpropagation(self, X, y, learning_rate):
        # Backpropagation

        # Layer 4 (Output Layer)
        delta4 = 2 * (self.output - y) * self.X_activation_derivative(self.output)
        dW4 = np.dot(self.concatenated_output.T, delta4)
        db4 = np.sum(delta4, axis=0, keepdims=True)

        # Split the delta for the concatenation in layer 4
        delta4_split = np.dot(delta4, self.weights['W4'].T)

        # Separate the deltas for layers 1, 2, and 3
        delta3 = delta4_split[:, -self.hidden3_size:] * self.X_3_activation_derivative(self.a3)

        delta2 = delta4_split[:, -self.hidden3_size-self.hidden2_size:-self.hidden3_size] * self.X_2_activation_derivative(self.a2)
        delta1 = delta4_split[:, :-self.hidden3_size-self.hidden2_size] * self.X_activation_derivative(self.a1)

        # Calculate gradients for layers 1, 2, and 3
        dW3 = np.dot(X.T, delta3)
        db3 = np.sum(delta3, axis=0, keepdims=True)
        dW2 = np.dot(X.T, delta2)
        db2 = np.sum(delta2, axis=0, keepdims=True)
        dW1 = np.dot(X.T, delta1)
        db1 = np.sum(delta1, axis=0, keepdims=True)

        # Update weights and biases
        self.weights['W4'] -= learning_rate * dW4
        self.biases['b4'] -= learning_rate * db4
        self.weights['W3'] -= learning_rate * dW3
        self.biases['b3'] -= learning_rate * db3
        self.weights['W2'] -= learning_rate * dW2
        self.biases['b2'] -= learning_rate * db2
        self.weights['W1'] -= learning_rate * dW1
        self.biases['b1'] -= learning_rate * db1

        return dW4, db4, dW3, db3, dW2, db2, dW1, db1

    def train(self, X, y, learning_rate, epochs):
        for epoch in range(epochs):
            # Forward pass
            self.feedforward(X)

            # Backpropagation
            self.backpropagation(X, y, learning_rate)

            if epoch % 500 == 0:
                loss = np.mean((self.output - y) ** 2)
                print(f'Epoch {epoch}/{epochs}, Loss: {loss:.4f}')

##########################################################
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import time

iris = load_iris()
X = iris.data
y = iris.target
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Add a small epsilon value to avoid exactly zero or one values
epsilon = 1e-4
X = np.clip(X, epsilon, 1 - epsilon)  # Clip values to be in the range (epsilon, 1 - epsilon)

encoder = OneHotEncoder(sparse=False)
y = encoder.fit_transform(y.reshape(-1, 1))

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


learning_rate = 0.00001
epochs = 3000


input_size = 4
hidden1_size = 8
hidden2_size = 8
hidden3_size = 8
output_size = 3
# Create the neural network
nn = NeuralNetwork(input_size, hidden1_size, hidden2_size,hidden3_size, output_size)

# Train the neural network
start_time = time.time()

nn.train(X_train, y_train, learning_rate, epochs)

execution_time = time.time() - start_time

print(f"Training Execution Time: {execution_time:.2f} seconds")
# Evaluate the trained model
def accuracy(y_true, y_pred):
    return np.mean(np.argmax(y_true, axis=1) == np.argmax(y_pred, axis=1))

y_pred = nn.feedforward(X_test)
acc = accuracy(y_test, y_pred)
print(f'Test Accuracy: {acc * 100:.2f}%')



Epoch 0/3000, Loss: 15.7831
Epoch 500/3000, Loss: 0.1666
Epoch 1000/3000, Loss: 0.1131
Epoch 1500/3000, Loss: 0.0945
Epoch 2000/3000, Loss: 0.0838
Epoch 2500/3000, Loss: 0.0768
Training Execution Time: 1.35 seconds
Test Accuracy: 93.33%


In [None]:
def X_activation(x):
    return x

def X_activation_derivative(x):
    return np.ones_like(x)

def X_2_activation(x):
    return (x**2) / 8

def X_2_activation_derivative(x):
    return (x / 4)

def X_3_activation(x):
    return (x**3) / 24

def X_3_activation_derivative(x):
    return (x**2) / 8

class NeuralNetwork:
    def __init__(self, input_size, hidden1_size, hidden2_size, hidden3_size, output_size):
        # Define the architecture
        self.input_size = input_size
        self.hidden1_size = hidden1_size
        self.hidden2_size = hidden2_size
        self.hidden3_size = hidden3_size
        self.output_size = output_size

        # Initialize weights and biases
        self.weights = {
            'W1': np.random.randn(input_size, hidden1_size),
            'W2': np.random.randn(input_size, hidden2_size),
            'W3': np.random.randn(input_size, hidden3_size),
            'W4': np.random.randn(hidden1_size + hidden2_size + hidden3_size, output_size)
        }

        self.biases = {
            'b1': np.random.randn(1, hidden1_size),
            'b2': np.random.randn(1, hidden2_size),
            'b3': np.random.randn(1, hidden3_size),
            'b4': np.random.randn(1, output_size)
        }

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        return x * (1 - x)

    def X_activation(self, x):
        return X_activation(x)

    def X_activation_derivative(self, x):
        return X_activation_derivative(x)

    def X_2_activation(self, x):
        return X_2_activation(x)

    def X_2_activation_derivative(self, x):
        return X_2_activation_derivative(x)

    def X_3_activation(self, x):
        return X_3_activation(x)

    def X_3_activation_derivative(self, x):
        return X_3_activation_derivative(x)

    def feedforward(self, X):
        # Layer 1 (X Activation)
        self.z1 = np.dot(X, self.weights['W1']) + self.biases['b1']
        self.a1 = self.X_activation(self.z1)  # Use X activation for layer

        # Layer 2
        self.z2 = np.dot(X, self.weights['W2']) + self.biases['b2']
        self.a2 = self.X_2_activation(self.z2)

        # Layer 3
        self.z3 = np.dot(X, self.weights['W3']) + self.biases['b3']
        self.a3 = self.X_3_activation(self.z3)

        # Concatenate the outputs of layers 1, 2, and 3
        self.concatenated_output = np.concatenate((self.a1, self.a2, self.a3), axis=1)

        # Layer 4 (Output Layer)
        self.z4 = np.dot(self.concatenated_output, self.weights['W4']) + self.biases['b4']
        self.output = self.X_activation(self.z4)

        return self.output

    def backpropagation(self, X, y, learning_rate):
        # Backpropagation

        # Layer 4 (Output Layer)
        delta4 = 2 * (self.output - y) * self.X_activation_derivative(self.output)
        dW4 = np.dot(self.concatenated_output.T, delta4)
        db4 = np.sum(delta4, axis=0, keepdims=True)

        # Split the delta for the concatenation in layer 4
        delta4_split = np.dot(delta4, self.weights['W4'].T)

        # Separate the deltas for layers 1, 2, and 3
        delta3 = delta4_split[:, -self.hidden3_size:] * self.X_3_activation_derivative(self.a3)

        delta2 = delta4_split[:, -self.hidden3_size-self.hidden2_size:-self.hidden3_size] * self.X_2_activation_derivative(self.a2)
        delta1 = delta4_split[:, :-self.hidden3_size-self.hidden2_size] * self.X_activation_derivative(self.a1)

        # Calculate gradients for layers 1, 2, and 3
        dW3 = np.dot(X.T, delta3)
        db3 = np.sum(delta3, axis=0, keepdims=True)
        dW2 = np.dot(X.T, delta2)
        db2 = np.sum(delta2, axis=0, keepdims=True)
        dW1 = np.dot(X.T, delta1)
        db1 = np.sum(delta1, axis=0, keepdims=True)

        # Update weights and biases
        self.weights['W4'] -= learning_rate * dW4
        self.biases['b4'] -= learning_rate * db4
        self.weights['W3'] -= learning_rate * dW3
        self.biases['b3'] -= learning_rate * db3
        self.weights['W2'] -= learning_rate * dW2
        self.biases['b2'] -= learning_rate * db2
        self.weights['W1'] -= learning_rate * dW1
        self.biases['b1'] -= learning_rate * db1

        return dW4, db4, dW3, db3, dW2, db2, dW1, db1

    def train(self, X, y, learning_rate, epochs):
        batch_size = 10
        n_batches = len(X) // batch_size

        for epoch in range(epochs):
            epoch_loss = 0  # Reset epoch loss

            for i in range(0, len(X), batch_size):
                X_batch = X[i:i + batch_size]
                y_batch = y[i:i + batch_size]

                # Forward pass
                self.feedforward(X_batch)

                # Backpropagation
                self.backpropagation(X_batch, y_batch, learning_rate)

                # Calculate batch loss and add it to epoch loss
                batch_loss = np.mean((self.output - y_batch) ** 2)
                epoch_loss += batch_loss

            # Calculate average epoch loss
            epoch_loss /= n_batches

            if epoch % 500 == 0:
                print(f'Epoch {epoch}/{epochs}, Loss: {epoch_loss:.4f}')

##########################################################
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import time

iris = load_iris()
X = iris.data
y = iris.target
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Add a small epsilon value to avoid exactly zero or one values
epsilon = 1e-4
X = np.clip(X, epsilon, 1 - epsilon)  # Clip values to be in the range (epsilon, 1 - epsilon)

encoder = OneHotEncoder(sparse=False)
y = encoder.fit_transform(y.reshape(-1, 1))

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


learning_rate = 0.0001
epochs = 100


input_size = 4
hidden1_size = 8
hidden2_size = 8
hidden3_size = 8
output_size = 3
# Create the neural network
nn = NeuralNetwork(input_size, hidden1_size, hidden2_size,hidden3_size, output_size)

# Train the neural network
start_time = time.time()

nn.train(X_train, y_train, learning_rate, epochs)

execution_time = time.time() - start_time

print(f"Training Execution Time: {execution_time:.2f} seconds")
# Evaluate the trained model
def accuracy(y_true, y_pred):
    return np.mean(np.argmax(y_true, axis=1) == np.argmax(y_pred, axis=1))

y_pred = nn.feedforward(X_test)
acc = accuracy(y_test, y_pred)
print(f'Test Accuracy: {acc * 100:.2f}%')

Epoch 0/100, Loss: 11.0058
Training Execution Time: 0.19 seconds
Test Accuracy: 70.00%




In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Activation functions
def identity(x):
    return x

def square(x):
    return np.power(x, 2)

def identity_derivative(x):
    return np.ones_like(x)

def square_derivative(x):
    return 2 * x

# Initialize network parameters
def initialize_parameters(input_size, hidden_size1, hidden_size2, output_size):
    return {
        'W1': np.random.randn(input_size, hidden_size1) * 0.1,
        'b1': np.zeros((1, hidden_size1)),
        'W2': np.random.randn(input_size, hidden_size2) * 0.1,
        'b2': np.zeros((1, hidden_size2)),
        'W3': np.random.randn(hidden_size1 + hidden_size2, output_size) * 0.1,
        'b3': np.zeros((1, output_size))
    }

# Forward pass
def forward_pass(X, params):
    Z1 = np.dot(X, params['W1']) + params['b1']
    A1 = identity(Z1)

    Z2 = np.dot(X, params['W2']) + params['b2']
    A2 = square(Z2)

    concatenated = np.concatenate((A1, A2), axis=1)

    Z3 = np.dot(concatenated, params['W3']) + params['b3']
    A3 = square(Z3)

    return A3, (X, Z1, A1, Z2, A2, Z3, A3, concatenated)

# Compute loss (Mean Squared Error)
def compute_loss(y_true, y_pred):
    return np.mean(np.square(y_true - y_pred))

# Backward pass
def backward_pass(y_true, cache, params):
    X, Z1, A1, Z2, A2, Z3, A3, concatenated = cache

    dA3 = 2 * (A3 - y_true)
    dZ3 = dA3 * square_derivative(Z3)
    dW3 = np.dot(concatenated.T, dZ3)
    db3 = np.sum(dZ3, axis=0, keepdims=True)

    d_concatenated = np.dot(dZ3, params['W3'].T)
    dA1, dA2 = d_concatenated[:, :hidden_size1], d_concatenated[:, hidden_size1:]

    dZ1 = dA1 * identity_derivative(Z1)
    dW1 = np.dot(X.T, dZ1)
    db1 = np.sum(dZ1, axis=0, keepdims=True)

    dZ2 = dA2 * square_derivative(Z2)
    dW2 = np.dot(X.T, dZ2)
    db2 = np.sum(dZ2, axis=0, keepdims=True)

    grads = {'dW1': dW1, 'db1': db1, 'dW2': dW2, 'db2': db2, 'dW3': dW3, 'db3': db3}
    return grads

# Update network parameters
def update_parameters(params, grads, learning_rate):
    for key in params.keys():
        params[key] -= learning_rate * grads['d' + key]
    return params

# Load and preprocess the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
Y_onehot = np.eye(3)[y]  # Convert to one-hot encoding

# Split the dataset
X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y_onehot, test_size=0.2, random_state=42)

# Network architecture
input_size = X_train.shape[1]
hidden_size1 = 5
hidden_size2 = 5
output_size = 3

# Initialize parameters
params = initialize_parameters(input_size, hidden_size1, hidden_size2, output_size)

# Training settings
epochs = 200
learning_rate = 0.001

# Training loop
for epoch in range(epochs):
    output, cache = forward_pass(X_train, params)
    loss = compute_loss(Y_train, output)
    grads = backward_pass(Y_train, cache, params)
    params = update_parameters(params, grads, learning_rate)

    if epoch % 20 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Evaluate the model
output_test, _ = forward_pass(X_test, params)
test_loss = compute_loss(Y_test, output_test)
predictions = np.argmax(output_test, axis=1)
# Convert predictions to one-hot encoded format
predictions_onehot = np.eye(output_size)[predictions]

# Compute accuracy using one-hot encoded predictions
accuracy = np.mean(np.all(predictions_onehot == Y_test, axis=1))

print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")


Epoch 0, Loss: 0.3331
Epoch 20, Loss: 0.2046
Epoch 40, Loss: 0.0684
Epoch 60, Loss: 0.0364
Epoch 80, Loss: 0.0379
Epoch 100, Loss: 0.0347
Epoch 120, Loss: 0.0330
Epoch 140, Loss: 0.0318
Epoch 160, Loss: 0.0309
Epoch 180, Loss: 0.0303
Test Loss: 0.0728
Test Accuracy: 0.9667


In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Activation functions
def identity(x):
    return x

def square(x):
    return np.power(x, 2)

def identity_derivative(x):
    return np.ones_like(x)

def square_derivative(x):
    return 2 * x

# Initialize network parameters
def initialize_parameters(input_size, hidden_size1, hidden_size2, hidden_size3, output_size):
    return {
        'W1': np.random.randn(input_size, hidden_size1) * 0.1,
        'b1': np.zeros((1, hidden_size1)),
        'W2': np.random.randn(input_size, hidden_size2) * 0.1,
        'b2': np.zeros((1, hidden_size2)),
        'W3': np.random.randn(hidden_size1 + hidden_size2, hidden_size3) * 0.1,
        'b3': np.zeros((1, hidden_size3)),
        'W4': np.random.randn(hidden_size3, output_size) * 0.1,
        'b4': np.zeros((1, output_size))
    }

# Forward pass
def forward_pass(X, params):
    Z1 = np.dot(X, params['W1']) + params['b1']
    A1 = identity(Z1)

    Z2 = np.dot(X, params['W2']) + params['b2']
    A2 = square(Z2)

    concatenated = np.concatenate((A1, A2), axis=1)

    Z3 = np.dot(concatenated, params['W3']) + params['b3']
    A3 = square(Z3)


    Z4 = np.dot(A3, params['W4']) + params['b4']
    A4 = identity(Z4)  # Linear activation

    return A4, (X, Z1, A1, Z2, A2, Z3, A3, Z4, A4, concatenated)

# Compute loss (Mean Squared Error)
def compute_loss(y_true, y_pred):
    return np.mean(np.square(y_true - y_pred))

# Backward pass
def backward_pass(y_true, cache, params):
    X, Z1, A1, Z2, A2, Z3, A3, Z4, A4, concatenated = cache

    dA4 = 2 * (A4 - y_true)
    dZ4 = dA4 * identity_derivative(Z4)  # Derivative of linear activation is 1
    dW4 = np.dot(A3.T, dZ4)
    db4 = np.sum(dZ4, axis=0, keepdims=True)

    dA3 = np.dot(dZ4, params['W4'].T)
    dZ3 = dA3 * square_derivative(Z3)
    dW3 = np.dot(concatenated.T, dZ3)
    db3 = np.sum(dZ3, axis=0, keepdims=True)

    d_concatenated = np.dot(dZ3, params['W3'].T)
    dA1, dA2 = d_concatenated[:, :hidden_size1], d_concatenated[:, hidden_size1:]

    dZ1 = dA1 * identity_derivative(Z1)
    dW1 = np.dot(X.T, dZ1)
    db1 = np.sum(dZ1, axis=0, keepdims=True)

    dZ2 = dA2 * square_derivative(Z2)
    dW2 = np.dot(X.T, dZ2)
    db2 = np.sum(dZ2, axis=0, keepdims=True)

    grads = {'dW1': dW1, 'db1': db1, 'dW2': dW2, 'db2': db2, 'dW3': dW3, 'db3': db3, 'dW4': dW4, 'db4': db4}
    return grads

# Update network parameters
def update_parameters(params, grads, learning_rate):
    for key in params.keys():
        params[key] -= learning_rate * grads['d' + key]
    return params

# Load and preprocess the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
Y_onehot = np.eye(3)[y]  # Convert to one-hot encoding

# Split the dataset
X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y_onehot, test_size=0.2, random_state=42)

# Network architecture
input_size = X_train.shape[1]
hidden_size1 = 5
hidden_size2 = 5
hidden_size3 = 5
output_size = 3

# Initialize parameters
params = initialize_parameters(input_size, hidden_size1, hidden_size2, hidden_size3, output_size)

# Training settings
epochs = 200
learning_rate = 0.001

# Training loop
for epoch in range(epochs):
    output, cache = forward_pass(X_train, params)
    loss = compute_loss(Y_train, output)
    grads = backward_pass(Y_train, cache, params)
    params = update_parameters(params, grads, learning_rate)

    if epoch % 20 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Evaluate the model
output_test, _ = forward_pass(X_test, params)
test_loss = compute_loss(Y_test, output_test)
predictions = np.argmax(output_test, axis=1)
# Convert predictions to one-hot encoded format
predictions_onehot = np.eye(output_size)[predictions]

# Compute accuracy using one-hot encoded predictions
accuracy = np.mean(np.all(predictions_onehot == Y_test, axis=1))

print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")


Epoch 0, Loss: 0.3337
Epoch 20, Loss: 0.2220
Epoch 40, Loss: 0.2210
Epoch 60, Loss: 0.2001
Epoch 80, Loss: 0.1148
Epoch 100, Loss: 0.1125
Epoch 120, Loss: 0.1042
Epoch 140, Loss: 0.0653
Epoch 160, Loss: 0.0712
Epoch 180, Loss: 0.0715
Test Loss: 0.0688
Test Accuracy: 0.9333


In [9]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Activation functions
def identity(x):
    return x

def square(x):
    return np.power(x, 2)

def identity_derivative(x):
    return np.ones_like(x)

def square_derivative(x):
    return 2 * x

# Initialize network parameters
def initialize_parameters(input_size, hidden_size1, hidden_size2, hidden_size3, output_size):
    return {
        'W1': np.random.randn(input_size, hidden_size1) * 0.1,
        'b1': np.zeros((1, hidden_size1)),
        'W2': np.random.randn(input_size, hidden_size2) * 0.1,
        'b2': np.zeros((1, hidden_size2)),
        'W3': np.random.randn(hidden_size1 + hidden_size2, hidden_size3) * 0.1,
        'b3': np.zeros((1, hidden_size3)),
        'W4': np.random.randn(hidden_size3, output_size) * 0.1,  # Corrected output size
        'b4': np.zeros((1, output_size))
    }

# Forward pass
def forward_pass(X, params):
    Z1 = np.dot(X, params['W1']) + params['b1']
    A1 = identity(Z1)

    Z2 = np.dot(X, params['W2']) + params['b2']
    A2 = square(Z2)

    concatenated = np.concatenate((A1, A2), axis=1)

    Z3 = np.dot(concatenated, params['W3']) + params['b3']
    A3 = square(Z3)

    concatenated_A3 = np.concatenate((A3, concatenated), axis=1)  # Concatenate A3 and concatenated

    Z4 = np.dot(concatenated_A3, params['W4']) + params['b4']
    A4 = identity(Z4)  # Linear activation

    return A4, (X, Z1, A1, Z2, A2, Z3, A3, concatenated_A3, Z4, A4, concatenated)

# Compute loss (Mean Squared Error)
def compute_loss(y_true, y_pred):
    return np.mean(np.square(y_true - y_pred))

# Backward pass
def backward_pass(y_true, cache, params):
    X, Z1, A1, Z2, A2, Z3, A3, concatenated_A3, Z4, A4, concatenated = cache

    dA4 = 2 * (A4 - y_true)
    dZ4 = dA4 * identity_derivative(Z4)  # Derivative of linear activation is 1
    dW4 = np.dot(concatenated_A3.T, dZ4)  # Update using concatenated A3
    db4 = np.sum(dZ4, axis=0, keepdims=True)

    d_concatenated_A3 = np.dot(dZ4, params['W4'].T)
    dA3 = d_concatenated_A3[:, :hidden_size3]
    d_concatenated = d_concatenated_A3[:, hidden_size3:]

    dZ3 = dA3 * square_derivative(Z3)
    dW3 = np.dot(concatenated.T, dZ3)
    db3 = np.sum(dZ3, axis=0, keepdims=True)

    d_concatenated_Z2 = dA2 * square_derivative(Z2)
    dA1, dA2 = d_concatenated[:, :hidden_size1], d_concatenated[:, hidden_size1:]

    dZ1 = dA1 * identity_derivative(Z1)
    dW1 = np.dot(X.T, dZ1)
    db1 = np.sum(dZ1, axis=0, keepdims=True)

    dZ2 = d_concatenated_Z2
    dW2 = np.dot(X.T, dZ2)
    db2 = np.sum(dZ2, axis=0, keepdims=True)

    grads = {'dW1': dW1, 'db1': db1, 'dW2': dW2, 'db2': db2, 'dW3': dW3, 'db3': db3, 'dW4': dW4, 'db4': db4}
    return grads

# Update network parameters
def update_parameters(params, grads, learning_rate):
    for key in params.keys():
        params[key] -= learning_rate * grads['d' + key]
    return params

# Load and preprocess the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
Y_onehot = np.eye(3)[y]  # Convert to one-hot encoding

# Split the dataset
X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y_onehot, test_size=0.2, random_state=42)

# Network architecture
input_size = X_train.shape[1]
hidden_size1 = 5
hidden_size2 = 5
hidden_size3 = 5
output_size = 3

# Initialize parameters
params = initialize_parameters(input_size, hidden_size1, hidden_size2, hidden_size3, output_size)

# Training settings
epochs = 200
learning_rate = 0.001

# Training loop
for epoch in range(epochs):
    output, cache = forward_pass(X_train, params)
    loss = compute_loss(Y_train, output)
    grads = backward_pass(Y_train, cache, params)
    params = update_parameters(params, grads, learning_rate)

    if epoch % 20 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Evaluate the model
output_test, _ = forward_pass(X_test, params)
test_loss = compute_loss(Y_test, output_test)
predictions = np.argmax(output_test, axis=1)
# Convert predictions to one-hot encoded format
predictions_onehot = np.eye(output_size)[predictions]

# Compute accuracy using one-hot encoded predictions
accuracy = np.mean(np.all(predictions_onehot == Y_test, axis=1))

print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")


ValueError: shapes (120,15) and (5,3) not aligned: 15 (dim 1) != 5 (dim 0)

In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Activation functions
def identity(x):
    return x

def square(x):
    return np.power(x, 2)

def identity_derivative(x):
    return np.ones_like(x)

def square_derivative(x):
    return 2 * x

# Initialize network parameters
def initialize_parameters(input_size, hidden_size1, hidden_size2, hidden_size3, output_size):
    return {
        'W1': np.random.randn(input_size, hidden_size1) * 0.1,
        'b1': np.zeros((1, hidden_size1)),
        'W2': np.random.randn(input_size, hidden_size2) * 0.1,
        'b2': np.zeros((1, hidden_size2)),
        'W3': np.random.randn(hidden_size1 + hidden_size2, hidden_size3) * 0.1,
        'b3': np.zeros((1, hidden_size3)),
        'W4': np.random.randn(hidden_size3 + hidden_size1 + hidden_size2, output_size) * 0.1,
        'b4': np.zeros((1, output_size))
    }

# Forward pass
def forward_pass(X, params):
    Z1 = np.dot(X, params['W1']) + params['b1']
    A1 = identity(Z1)

    Z2 = np.dot(X, params['W2']) + params['b2']
    A2 = square(Z2)

    concatenated = np.concatenate((A1, A2), axis=1)

    Z3 = np.dot(concatenated, params['W3']) + params['b3']
    A3 = square(Z3)

    concatenated_A3 = np.concatenate((A3, concatenated), axis=1)

    Z4 = np.dot(concatenated_A3, params['W4']) + params['b4']
    A4 = identity(Z4)  # Linear activation

    return A4, (X, Z1, A1, Z2, A2, Z3, A3, concatenated_A3, Z4, A4, concatenated)

# Compute loss (Mean Squared Error)
def compute_loss(y_true, y_pred):
    return np.mean(np.square(y_true - y_pred))

# Backward pass
def backward_pass(y_true, cache, params):
    X, Z1, A1, Z2, A2, Z3, A3, concatenated_A3, Z4, A4, concatenated = cache

    dA4 = 2 * (A4 - y_true)
    dZ4 = dA4 * identity_derivative(Z4)  # Derivative of linear activation is 1
    dW4 = np.dot(concatenated_A3.T, dZ4)
    db4 = np.sum(dZ4, axis=0, keepdims=True)

    d_concatenated_A3 = np.dot(dZ4, params['W4'].T)
    dA3 = d_concatenated_A3[:, :hidden_size3]
    d_concatenated = d_concatenated_A3[:, hidden_size3:]

    dZ3 = dA3 * square_derivative(Z3)
    dW3 = np.dot(concatenated.T, dZ3)
    db3 = np.sum(dZ3, axis=0, keepdims=True)

    d_concatenated_Z2 = dA2 * square_derivative(Z2)
    dA1, dA2 = d_concatenated[:, :hidden_size1], d_concatenated[:, hidden_size1:]

    dZ1 = dA1 * identity_derivative(Z1)
    dW1 = np.dot(X.T, dZ1)
    db1 = np.sum(dZ1, axis=0, keepdims=True)

    dZ2 = d_concatenated_Z2[:, hidden_size1:]  # Update dZ2 calculation
    dW2 = np.dot(X.T, dZ2)
    db2 = np.sum(dZ2, axis=0, keepdims=True)

    grads = {'dW1': dW1, 'db1': db1, 'dW2': dW2, 'db2': db2, 'dW3': dW3, 'db3': db3, 'dW4': dW4, 'db4': db4}
    return grads

# Update network parameters
def update_parameters(params, grads, learning_rate):
    for key in params.keys():
        params[key] -= learning_rate * grads['d' + key]
    return params

# Load and preprocess the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
Y_onehot = np.eye(3)[y]

# Split the dataset
X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y_onehot, test_size=0.2, random_state=42)

# Network architecture
input_size = X_train.shape[1]
hidden_size1 = 5
hidden_size2 = 5
hidden_size3 = 5
output_size = 3

# Initialize parameters
params = initialize_parameters(input_size, hidden_size1, hidden_size2, hidden_size3, output_size)

# Training settings
epochs = 200
learning_rate = 0.001

# Training loop
for epoch in range(epochs):
    output, cache = forward_pass(X_train, params)
    loss = compute_loss(Y_train, output)
    grads = backward_pass(Y_train, cache, params)
    params = update_parameters(params, grads, learning_rate)

    if epoch % 20 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Evaluate the model
output_test, _ = forward_pass(X_test, params)
test_loss = compute_loss(Y_test, output_test)
predictions = np.argmax(output_test, axis=1)
predictions_onehot = np.eye(output_size)[predictions]
accuracy = np.mean(np.all(predictions_onehot == Y_test, axis=1))

print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")


UnboundLocalError: local variable 'dA2' referenced before assignment

In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Activation functions
def identity(x):
    return x

def square(x):
    return np.power(x, 2)/4

def square_(x):
    return np.power(x, 2)/24

def identity_derivative(x):
    return np.ones_like(x)

def square_derivative(x):
    return 2 * x

# Initialize network parameters
def initialize_parameters(input_size, hidden_size1, hidden_size2, hidden_size3, output_size):
    return {
        'W1': np.random.randn(input_size, hidden_size1) * 0.1,
        'b1': np.zeros((1, hidden_size1)),
        'W2': np.random.randn(input_size, hidden_size2) * 0.1,
        'b2': np.zeros((1, hidden_size2)),
        'W3': np.random.randn(hidden_size1 + hidden_size2, hidden_size3) * 0.1,
        'b3': np.zeros((1, hidden_size3)),
        'W4': np.random.randn(hidden_size3 + hidden_size1 + hidden_size2, output_size) * 0.1,
        'b4': np.zeros((1, output_size))
    }

# Forward pass
def forward_pass(X, params):
    Z1 = np.dot(X, params['W1']) + params['b1']
    A1 = identity(Z1)

    Z2 = np.dot(X, params['W2']) + params['b2']
    A2 = square(Z2)

    concatenated = np.concatenate((A1, A2), axis=1)

    Z3 = np.dot(concatenated, params['W3']) + params['b3']
    A3 = square_(Z3)

    concatenated_A3 = np.concatenate((A3, concatenated), axis=1)

    Z4 = np.dot(concatenated_A3, params['W4']) + params['b4']
    A4 = identity(Z4)  # Linear activation

    return A4, (X, Z1, A1, Z2, A2, Z3, A3, concatenated_A3, Z4, A4, concatenated)

# Compute loss (Mean Squared Error)
def compute_loss(y_true, y_pred):
    return np.mean(np.square(y_true - y_pred))

# Backward pass
def backward_pass(y_true, cache, params):
    X, Z1, A1, Z2, A2, Z3, A3, concatenated_A3, Z4, A4, concatenated = cache

    dA4 = 2 * (A4 - y_true)
    dZ4 = dA4 * identity_derivative(Z4)  # Derivative of linear activation is 1
    dW4 = np.dot(concatenated_A3.T, dZ4)
    db4 = np.sum(dZ4, axis=0, keepdims=True)

    d_concatenated_A3 = np.dot(dZ4, params['W4'].T)
    dA3 = d_concatenated_A3[:, :hidden_size3]
    d_concatenated = d_concatenated_A3[:, hidden_size3:]

    dZ3 = dA3 * square_derivative(Z3)
    dW3 = np.dot(concatenated.T, dZ3)
    db3 = np.sum(dZ3, axis=0, keepdims=True)

    dA2 = d_concatenated[:, hidden_size1:]  # Corrected dA2 calculation
    d_concatenated_Z2 = dA2 * square_derivative(Z2)

    dA1 = d_concatenated[:, :hidden_size1]
    dZ1 = dA1 * identity_derivative(Z1)
    dW1 = np.dot(X.T, dZ1)
    db1 = np.sum(dZ1, axis=0, keepdims=True)

    dZ2 = d_concatenated_Z2
    dW2 = np.dot(X.T, dZ2)
    db2 = np.sum(dZ2, axis=0, keepdims=True)

    grads = {'dW1': dW1, 'db1': db1, 'dW2': dW2, 'db2': db2, 'dW3': dW3, 'db3': db3, 'dW4': dW4, 'db4': db4}
    return grads

# Update network parameters
def update_parameters(params, grads, learning_rate):
    for key in params.keys():
        params[key] -= learning_rate * grads['d' + key]
    return params

# Load and preprocess the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
Y_onehot = np.eye(3)[y]

# Split the dataset
X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y_onehot, test_size=0.2, random_state=42)

# Network architecture
input_size = X_train.shape[1]
hidden_size1 = 5
hidden_size2 = 5
hidden_size3 = 5
output_size = 3

# Initialize parameters
params = initialize_parameters(input_size, hidden_size1, hidden_size2, hidden_size3, output_size)

# Training settings
epochs = 200
learning_rate = 0.001

# Training loop
for epoch in range(epochs):
    output, cache = forward_pass(X_train, params)
    loss = compute_loss(Y_train, output)
    grads = backward_pass(Y_train, cache, params)
    params = update_parameters(params, grads, learning_rate)

    if epoch % 20 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Evaluate the model
output_test, _ = forward_pass(X_test, params)
test_loss = compute_loss(Y_test, output_test)
predictions = np.argmax(output_test, axis=1)
predictions_onehot = np.eye(output_size)[predictions]
accuracy = np.mean(np.all(predictions_onehot == Y_test, axis=1))

print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")


Epoch 0, Loss: 0.3420
Epoch 20, Loss: 0.0969
Epoch 40, Loss: 0.0639
Epoch 60, Loss: 0.0479
Epoch 80, Loss: 0.0399
Epoch 100, Loss: 0.0364
Epoch 120, Loss: 0.0346
Epoch 140, Loss: 0.0334
Epoch 160, Loss: 0.0491
Epoch 180, Loss: 0.0484
Test Loss: 0.0561
Test Accuracy: 1.0000


In [1]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Activation functions
def identity(x):
    return x

def square(x):
    return np.power(x, 2)/4

def square_(x):
    return np.power(x, 2)/24

def identity_derivative(x):
    return np.ones_like(x)

def square_derivative(x):
    return 2 * x

# Initialize network parameters
def initialize_parameters(input_size, hidden_size1, hidden_size2, hidden_size3, output_size):
    return {
        'W1': np.random.randn(input_size, hidden_size1) * 0.1,
        'b1': np.zeros((1, hidden_size1)),
        'W2': np.random.randn(input_size, hidden_size2) * 0.1,
        'b2': np.zeros((1, hidden_size2)),
        'W3': np.random.randn(hidden_size1 + hidden_size2, hidden_size3) * 0.1,
        'b3': np.zeros((1, hidden_size3)),
        'W4': np.random.randn(hidden_size3 + hidden_size1 + hidden_size2, output_size) * 0.1,
        'b4': np.zeros((1, output_size))
    }

# Forward pass
def forward_pass(X, params):
    Z1 = np.dot(X, params['W1']) + params['b1']
    A1 = identity(Z1)

    Z2 = np.dot(X, params['W2']) + params['b2']
    A2 = square(Z2)

    concatenated = np.concatenate((A1, A2), axis=1)

    Z3 = np.dot(concatenated, params['W3']) + params['b3']
    A3 = square_(Z3)

    concatenated_A3 = np.concatenate((A3, concatenated), axis=1)

    Z4 = np.dot(concatenated_A3, params['W4']) + params['b4']
    A4 = identity(Z4)  # Linear activation

    return A4, (X, Z1, A1, Z2, A2, Z3, A3, concatenated_A3, Z4, A4, concatenated)

# Compute loss (Mean Squared Error)
def compute_loss(y_true, y_pred):
    return np.mean(np.square(y_true - y_pred))

# Backward pass
def backward_pass(y_true, cache, params):
    X, Z1, A1, Z2, A2, Z3, A3, concatenated_A3, Z4, A4, concatenated = cache

    dA4 = 2 * (A4 - y_true)
    dZ4 = dA4 * identity_derivative(Z4)  # Derivative of linear activation is 1
    dW4 = np.dot(concatenated_A3.T, dZ4)
    db4 = np.sum(dZ4, axis=0, keepdims=True)

    d_concatenated_A3 = np.dot(dZ4, params['W4'].T)
    dA3 = d_concatenated_A3[:, :hidden_size3]
    d_concatenated = d_concatenated_A3[:, hidden_size3:]

    dZ3 = dA3 * square_derivative(Z3)
    dW3 = np.dot(concatenated.T, dZ3)
    db3 = np.sum(dZ3, axis=0, keepdims=True)

    dA2 = d_concatenated[:, hidden_size1:]  # Corrected dA2 calculation
    d_concatenated_Z2 = dA2 * square_derivative(Z2)

    dA1 = d_concatenated[:, :hidden_size1]
    dZ1 = dA1 * identity_derivative(Z1)
    dW1 = np.dot(X.T, dZ1)
    db1 = np.sum(dZ1, axis=0, keepdims=True)

    dZ2 = d_concatenated_Z2
    dW2 = np.dot(X.T, dZ2)
    db2 = np.sum(dZ2, axis=0, keepdims=True)

    grads = {'dW1': dW1, 'db1': db1, 'dW2': dW2, 'db2': db2, 'dW3': dW3, 'db3': db3, 'dW4': dW4, 'db4': db4}
    return grads

# Update network parameters
def update_parameters(params, grads, learning_rate):
    for key in params.keys():
        params[key] -= learning_rate * grads['d' + key]
    return params

# Load and preprocess the Breast Cancer Wisconsin dataset
breast_cancer = load_breast_cancer()
X, y = breast_cancer.data, breast_cancer.target
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
Y_onehot = np.eye(2)[y]  # One-hot encode target

# Split the dataset
X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y_onehot, test_size=0.2, random_state=42)

# Network architecture
input_size = X_train.shape[1]
hidden_size1 = 5
hidden_size2 = 5
hidden_size3 = 5
output_size = 2  # Two classes: benign and malignant

# Initialize parameters
params = initialize_parameters(input_size, hidden_size1, hidden_size2, hidden_size3, output_size)

# Training settings
epochs = 200
learning_rate = 0.00001

# Training loop
for epoch in range(epochs):
    output, cache = forward_pass(X_train, params)
    loss = compute_loss(Y_train, output)
    grads = backward_pass(Y_train, cache, params)
    params = update_parameters(params, grads, learning_rate)

    if epoch % 20 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Evaluate the model
output_test, _ = forward_pass(X_test, params)
test_loss = compute_loss(Y_test, output_test)
predictions = np.argmax(output_test, axis=1)
predictions_onehot = np.eye(output_size)[predictions]
accuracy = np.mean(np.all(predictions_onehot == Y_test, axis=1))

print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")


Epoch 0, Loss: 0.5341
Epoch 20, Loss: 0.3618
Epoch 40, Loss: 0.2569
Epoch 60, Loss: 0.1899
Epoch 80, Loss: 0.1477
Epoch 100, Loss: 0.1213
Epoch 120, Loss: 0.1044
Epoch 140, Loss: 0.0933
Epoch 160, Loss: 0.0859
Epoch 180, Loss: 0.0809
Test Loss: 0.0752
Test Accuracy: 0.9474


In [4]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import time

class NeuralNetwork:
    def __init__(self, input_size, hidden_size1, hidden_size2, hidden_size3, output_size):
        self.params = {
            'W1': np.random.randn(input_size, hidden_size1) * 0.1,
            'b1': np.zeros((1, hidden_size1)),
            'W2': np.random.randn(input_size, hidden_size2) * 0.1,
            'b2': np.zeros((1, hidden_size2)),
            'W3': np.random.randn(hidden_size1 + hidden_size2, hidden_size3) * 0.1,
            'b3': np.zeros((1, hidden_size3)),
            'W4': np.random.randn(hidden_size3 + hidden_size1 + hidden_size2, output_size) * 0.1,
            'b4': np.zeros((1, output_size))
        }

    def activation_identity(self, x):
        return x

    def activation_square(self, x):
        return np.power(x, 2) / 4

    def activation_square_(self, x):
        return np.power(x, 2) / 24

    def derivative_identity(self, x):
        return np.ones_like(x)

    def derivative_square(self, x):
        return 2 * x

    def forward_pass(self, X):
        Z1 = np.dot(X, self.params['W1']) + self.params['b1']
        A1 = self.activation_identity(Z1)

        Z2 = np.dot(X, self.params['W2']) + self.params['b2']
        A2 = self.activation_square(Z2)

        concatenated = np.concatenate((A1, A2), axis=1)

        Z3 = np.dot(concatenated, self.params['W3']) + self.params['b3']
        A3 = self.activation_square_(Z3)

        concatenated_A3 = np.concatenate((A3, concatenated), axis=1)

        Z4 = np.dot(concatenated_A3, self.params['W4']) + self.params['b4']
        A4 = self.activation_identity(Z4)  # Linear activation

        return A4, (X, Z1, A1, Z2, A2, Z3, A3, concatenated_A3, Z4, A4, concatenated)

    def compute_loss(self, y_true, y_pred):
        return np.mean(np.square(y_true - y_pred))

    def backward_pass(self, y_true, cache):
        X, Z1, A1, Z2, A2, Z3, A3, concatenated_A3, Z4, A4, concatenated = cache

        dA4 = 2 * (A4 - y_true)
        dZ4 = dA4 * self.derivative_identity(Z4)
        dW4 = np.dot(concatenated_A3.T, dZ4)
        db4 = np.sum(dZ4, axis=0, keepdims=True)

        d_concatenated_A3 = np.dot(dZ4, self.params['W4'].T)
        dA3 = d_concatenated_A3[:, :hidden_size3]
        d_concatenated = d_concatenated_A3[:, hidden_size3:]

        dZ3 = dA3 * self.derivative_square(Z3)
        dW3 = np.dot(concatenated.T, dZ3)
        db3 = np.sum(dZ3, axis=0, keepdims=True)

        dA2 = d_concatenated[:, hidden_size1:]
        dZ2 = dA2 * self.derivative_square(Z2)
        dW2 = np.dot(X.T, dZ2)
        db2 = np.sum(dZ2, axis=0, keepdims=True)

        dA1 = d_concatenated[:, :hidden_size1]
        dZ1 = dA1 * self.derivative_identity(Z1)
        dW1 = np.dot(X.T, dZ1)
        db1 = np.sum(dZ1, axis=0, keepdims=True)

        grads = {'dW1': dW1, 'db1': db1, 'dW2': dW2, 'db2': db2, 'dW3': dW3, 'db3': db3, 'dW4': dW4, 'db4': db4}
        return grads

    def update_parameters(self, grads, learning_rate):
        for key in self.params.keys():
            self.params[key] -= learning_rate * grads['d' + key]

# Main script
if __name__ == "__main__":
    breast_cancer = load_breast_cancer()
    X, y = breast_cancer.data, breast_cancer.target
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    Y_onehot = np.eye(2)[y]

    X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y_onehot, test_size=0.2, random_state=42)

    input_size = X_train.shape[1]
    hidden_size1 = 5
    hidden_size2 = 5
    hidden_size3 = 5
    output_size = 2
    # Start the timer
    start_time = time.time()
    nn = NeuralNetwork(input_size, hidden_size1, hidden_size2, hidden_size3, output_size)
    epochs = 200
    learning_rate = 0.00001

    for epoch in range(epochs):
        output, cache = nn.forward_pass(X_train)
        loss = nn.compute_loss(Y_train, output)
        grads = nn.backward_pass(Y_train, cache)
        nn.update_parameters(grads, learning_rate)

        if epoch % 20 == 0:
            print(f"Epoch {epoch}, Loss: {loss:.4f}")

    end_time = time.time()

    # Calculate the execution time
    execution_time = end_time - start_time
    print(f"Execution Time: {execution_time:.2f} seconds")

    output_test, _ = nn.forward_pass(X_test)
    test_loss = nn.compute_loss(Y_test, output_test)
    predictions = np.argmax(output_test, axis=1)
    predictions_onehot = np.eye(output_size)[predictions]
    accuracy = np.mean(np.all(predictions_onehot == Y_test, axis=1))

    print(f"Test Loss: {test_loss:.4f}")
    print(f"Test Accuracy: {accuracy:.4f}")


Epoch 0, Loss: 0.4704
Epoch 20, Loss: 0.3405
Epoch 40, Loss: 0.2461
Epoch 60, Loss: 0.1825
Epoch 80, Loss: 0.1429
Epoch 100, Loss: 0.1185
Epoch 120, Loss: 0.1028
Epoch 140, Loss: 0.0922
Epoch 160, Loss: 0.0848
Epoch 180, Loss: 0.0795
Execution Time: 0.22 seconds
Test Loss: 0.0756
Test Accuracy: 0.9474


In [6]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import time

# Load and preprocess the dataset
breast_cancer = load_breast_cancer()
X, y = breast_cancer.data, breast_cancer.target
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Start the timer
start_time = time.time()
# Define the neural network model
model = Sequential([
    Dense(16, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2 ,verbose=0)

# Stop the timer
end_time = time.time()

# Calculate the execution time
execution_time = end_time - start_time
print(f"Execution Time: {execution_time:.2f} seconds")

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")


Execution Time: 13.07 seconds
Test Loss: 0.0915
Test Accuracy: 0.9649


In [8]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import time

def initialize_parameters(input_size, hidden_size, output_size):
    np.random.seed(42)
    parameters = {
        'W1': np.random.randn(input_size, hidden_size) * 0.1,
        'b1': np.zeros((1, hidden_size)),
        'W2': np.random.randn(hidden_size, output_size) * 0.1,
        'b2': np.zeros((1, output_size))
    }
    return parameters

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def forward_propagation(X, parameters):
    W1, b1, W2, b2 = parameters['W1'], parameters['b1'], parameters['W2'], parameters['b2']

    Z1 = np.dot(X, W1) + b1
    A1 = np.tanh(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = sigmoid(Z2)

    cache = {'Z1': Z1, 'A1': A1, 'Z2': Z2, 'A2': A2}
    return A2, cache

def compute_cost(A2, Y):
    m = Y.shape[0]
    cost = -np.sum(Y * np.log(A2) + (1 - Y) * np.log(1 - A2)) / m
    return cost

def backward_propagation(parameters, cache, X, Y):
    m = X.shape[0]
    W1, W2 = parameters['W1'], parameters['W2']
    A1, A2 = cache['A1'], cache['A2']

    dZ2 = A2 - Y
    dW2 = np.dot(A1.T, dZ2) / m
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m
    dZ1 = np.dot(dZ2, W2.T) * (1 - np.power(A1, 2))
    dW1 = np.dot(X.T, dZ1) / m
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m

    grads = {'dW1': dW1, 'db1': db1, 'dW2': dW2, 'db2': db2}
    return grads

def update_parameters(parameters, grads, learning_rate):
    parameters['W1'] -= learning_rate * grads['dW1']
    parameters['b1'] -= learning_rate * grads['db1']
    parameters['W2'] -= learning_rate * grads['dW2']
    parameters['b2'] -= learning_rate * grads['db2']
    return parameters

def model(X_train, Y_train, hidden_size, num_iterations, learning_rate):
    np.random.seed(42)
    input_size = X_train.shape[1]
    output_size = Y_train.shape[1]

    parameters = initialize_parameters(input_size, hidden_size, output_size)

    for i in range(num_iterations):
        A2, cache = forward_propagation(X_train, parameters)
        cost = compute_cost(A2, Y_train)
        grads = backward_propagation(parameters, cache, X_train, Y_train)
        parameters = update_parameters(parameters, grads, learning_rate)

        if i % 1000 == 0:
            print(f"Cost after iteration {i}: {cost}")

    return parameters

# Load and preprocess the dataset
breast_cancer = load_breast_cancer()
X, y = breast_cancer.data, breast_cancer.target
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
y = y.reshape(-1, 1)  # Reshape y to be a 2D array

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Start the timer
start_time = time.time()
# Train the model
hidden_size = 10
num_iterations = 10000
learning_rate = 0.1
parameters = model(X_train, y_train, hidden_size, num_iterations, learning_rate)
# Stop the timer
end_time = time.time()

# Calculate the execution time
execution_time = end_time - start_time
print(f"Execution Time: {execution_time:.2f} seconds")
# Predictions
A2, _ = forward_propagation(X_test, parameters)
predictions = (A2 > 0.5).astype(int)
accuracy = np.mean(predictions == y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")


Cost after iteration 0: 0.6971116687344172
Cost after iteration 1000: 0.042704063328290784
Cost after iteration 2000: 0.03106215535135298
Cost after iteration 3000: 0.022963292563800607
Cost after iteration 4000: 0.016861170232692107
Cost after iteration 5000: 0.012237622636446729
Cost after iteration 6000: 0.009090432077441613
Cost after iteration 7000: 0.007002780717522767
Cost after iteration 8000: 0.005584859240956626
Cost after iteration 9000: 0.004585029233093776
Execution Time: 8.81 seconds
Test Accuracy: 97.37%
