In [4]:
import numpy as np
import pandas as pd

# Load the dataset
data = pd.read_csv("Xor_Dataset.csv")

# Extract inputs (x, y) and outputs (z)
X = data[['X', 'Y']].values  # Inputs: x, y
y = data['Z'].values.reshape(-1, 1)  # Output: z (reshaped to column vector)

# Activation functions: ReLU for hidden, Sigmoid for output
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

# Adam optimizer parameters
beta1 = 0.9  # Exponential decay rate for first moment estimate
beta2 = 0.999  # Exponential decay rate for second moment estimate
epsilon = 1e-8  # Small constant to prevent division by zero
learning_rate = 0.001  # Learning rate
input_layer_neurons = 2  # XOR input has 2 features (x, y)
hidden_layer_neurons = 6  # Increased hidden neurons (to improve capacity)
output_layer_neurons = 1  # XOR output is a single binary value

# Xavier Initialization for weights
def initialize_weights(input_size, output_size):
    limit = np.sqrt(6 / (input_size + output_size))
    return np.random.uniform(-limit, limit, (input_size, output_size))

# Initialize weights and biases using Xavier initialization
weights_input_hidden = initialize_weights(input_layer_neurons, hidden_layer_neurons)
bias_hidden = np.zeros((1, hidden_layer_neurons))

weights_hidden_output = initialize_weights(hidden_layer_neurons, output_layer_neurons)
bias_output = np.zeros((1, output_layer_neurons))

# Adam optimizer momentums
m_weights_input_hidden = np.zeros_like(weights_input_hidden)
v_weights_input_hidden = np.zeros_like(weights_input_hidden)
m_bias_hidden = np.zeros_like(bias_hidden)
v_bias_hidden = np.zeros_like(bias_hidden)

m_weights_hidden_output = np.zeros_like(weights_hidden_output)
v_weights_hidden_output = np.zeros_like(weights_hidden_output)
m_bias_output = np.zeros_like(bias_output)
v_bias_output = np.zeros_like(bias_output)

# Training the network using Feedforward and Backpropagation with Adam Optimizer
epochs = 10000
for epoch in range(epochs):
    # Feedforward Propagation:

    # Hidden layer input
    hidden_layer_input = np.dot(X, weights_input_hidden) + bias_hidden
    hidden_layer_output = relu(hidden_layer_input)

    # Output layer input
    output_layer_input = np.dot(hidden_layer_output, weights_hidden_output) + bias_output
    output_layer_output = sigmoid(output_layer_input)

    # Calculate the error (Loss Function: Mean Squared Error)
    error = y - output_layer_output
    loss = np.mean(np.square(error))

    # Backpropagation (with Adam optimization)

    # Output layer error and delta
    output_layer_error = error
    output_layer_delta = output_layer_error * sigmoid_derivative(output_layer_output)

    # Hidden layer error and delta
    hidden_layer_error = output_layer_delta.dot(weights_hidden_output.T)
    hidden_layer_delta = hidden_layer_error * relu_derivative(hidden_layer_output)

    # Adam Update for weights and biases
    m_weights_input_hidden = beta1 * m_weights_input_hidden + (1 - beta1) * np.dot(X.T, hidden_layer_delta)
    v_weights_input_hidden = beta2 * v_weights_input_hidden + (1 - beta2) * np.dot(X.T, hidden_layer_delta)**2
    m_weights_hidden_output = beta1 * m_weights_hidden_output + (1 - beta1) * np.dot(hidden_layer_output.T, output_layer_delta)
    v_weights_hidden_output = beta2 * v_weights_hidden_output + (1 - beta2) * np.dot(hidden_layer_output.T, output_layer_delta)**2

    m_bias_hidden = beta1 * m_bias_hidden + (1 - beta1) * np.sum(hidden_layer_delta, axis=0, keepdims=True)
    v_bias_hidden = beta2 * v_bias_hidden + (1 - beta2) * np.sum(hidden_layer_delta, axis=0, keepdims=True)**2
    m_bias_output = beta1 * m_bias_output + (1 - beta1) * np.sum(output_layer_delta, axis=0, keepdims=True)
    v_bias_output = beta2 * v_bias_output + (1 - beta2) * np.sum(output_layer_delta, axis=0, keepdims=True)**2

    # Bias and weights update using Adam
    weights_input_hidden += learning_rate * m_weights_input_hidden / (np.sqrt(v_weights_input_hidden) + epsilon)
    weights_hidden_output += learning_rate * m_weights_hidden_output / (np.sqrt(v_weights_hidden_output) + epsilon)

    bias_hidden += learning_rate * m_bias_hidden / (np.sqrt(v_bias_hidden) + epsilon)
    bias_output += learning_rate * m_bias_output / (np.sqrt(v_bias_output) + epsilon)

    # Print the loss every 1000 epochs to observe the convergence
    if epoch % 1000 == 0:
        print(f"Epoch {epoch}, Loss: {loss}")

# Final output after training
print("Final Output after Training:")
print(output_layer_output)


Epoch 0, Loss: 0.2571879206299053
Epoch 1000, Loss: 0.010484309641948516
Epoch 2000, Loss: 0.003379559793611682
Epoch 3000, Loss: 0.0015234291253224275
Epoch 4000, Loss: 0.0007829631575888029
Epoch 5000, Loss: 0.000429781392090137
Epoch 6000, Loss: 0.00024497472409881104
Epoch 7000, Loss: 0.00014291453999368096
Epoch 8000, Loss: 8.464304234519455e-05
Epoch 9000, Loss: 5.065531220176381e-05
Final Output after Training:
[[0.0102778 ]
 [0.9976088 ]
 [0.00203539]
 ...
 [0.00203539]
 [0.00203539]
 [0.00203539]]


In [5]:
import numpy as np
import pandas as pd
import random

In [6]:
# Load the dataset
data = pd.read_csv("Xor_Dataset.csv")
print(data)

      X  Y  Z
0     0  0  0
1     0  1  1
2     1  1  0
3     1  1  0
4     0  0  0
...  .. .. ..
9995  0  0  0
9996  0  1  1
9997  1  1  0
9998  1  1  0
9999  1  1  0

[10000 rows x 3 columns]


In [7]:
inputs=data[["X","Y"]].to_numpy()
expected_output=data[["Z"]].to_numpy()

In [8]:
def sigmoid(x):
    return 1/(1+np.exp(-x))


In [9]:
def derivative_sigmoid(x):
    return x*(1-x)

In [10]:
def relu(x):
    return np.maximum(0, x)

In [17]:
def relu_derivative(x):
    return np.where(x > 0, 1, 0)

In [20]:
np.random.seed(42)
input_layer_neuron=2
hidden_layer1_neuron=6
hidden_layer2_neuron=4
outer_layer_neuron=1

In [21]:
# Binary Cross-Entropy Loss
def binary_cross_entropy_loss(y_true, y_pred):
    return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

In [26]:
#for hidden_layer_1
weight_input_hidden1 = np.random.randn(input_layer_neuron, hidden_layer1_neuron) * np.sqrt(2. / input_layer_neuron)
bias_hidden1 = np.zeros((1, hidden_layer1_neuron))

#for hidden_layer_2
weight_hidden1_hidden2 = np.random.randn(hidden_layer1_neuron, hidden_layer2_neuron) * np.sqrt(2. / hidden_layer1_neuron)
bias_hidden2 = np.zeros((1, hidden_layer2_neuron))

#for output_layer
weight_hidden2_output = np.random.randn(hidden_layer2_neuron, output_layer_neuron) * np.sqrt(1. / hidden_layer2_neuron)
bias_output = np.zeros((1, output_layer_neuron))

    

---------------------------------------
[[0. 0. 0. 0. 0. 0.]]


In [None]:
def forward_propagation(inputs):
    #hidden_layer_1
    hidden1_input=np.dot(inputs,weight_input_hidden1)+bias_hidden1
    hidden1_output=relu(hidden1_input)
    #hidden_layer_2
    hidden2_input=np.dot(weight_input_hidden1,weight_hidden1_hidden2)_bias_hidden2
    hidden2_output=relu(hidden2_input)
    #output_layer
    output_input=np.dot(weight_input_hidden2,)

In [13]:
# Training the network using Feedforward and Backpropagation with Adam Optimizer
epochs = 10000
for epoch in range(epochs):
    # Feedforward Propagation:

    # Hidden layer input
    hidden_layer_input = np.dot(X, weights_input_hidden) + bias_hidden
    hidden_layer_output = relu(hidden_layer_input)

    # Output layer input
    output_layer_input = np.dot(hidden_layer_output, weights_hidden_output) + bias_output
    output_layer_output = sigmoid(output_layer_input)

    # Calculate the error (Loss Function: Binary Cross-Entropy)
    loss = binary_cross_entropy_loss(y, output_layer_output)

    # Backpropagation (with Adam optimization)
    output_layer_error = y - output_layer_output
    output_layer_delta = output_layer_error * sigmoid_derivative(output_layer_output)

    hidden_layer_error = output_layer_delta.dot(weights_hidden_output.T)
    hidden_layer_delta = hidden_layer_error * relu_derivative(hidden_layer_output)

    # Adam optimizer update (same as before)
    m_weights_input_hidden = beta1 * m_weights_input_hidden + (1 - beta1) * np.dot(X.T, hidden_layer_delta)
    v_weights_input_hidden = beta2 * v_weights_input_hidden + (1 - beta2) * np.dot(X.T, hidden_layer_delta)**2
    m_weights_hidden_output = beta1 * m_weights_hidden_output + (1 - beta1) * np.dot(hidden_layer_output.T, output_layer_delta)
    v_weights_hidden_output = beta2 * v_weights_hidden_output + (1 - beta2) * np.dot(hidden_layer_output.T, output_layer_delta)**2

    m_bias_hidden = beta1 * m_bias_hidden + (1 - beta1) * np.sum(hidden_layer_delta, axis=0, keepdims=True)
    v_bias_hidden = beta2 * v_bias_hidden + (1 - beta2) * np.sum(hidden_layer_delta, axis=0, keepdims=True)**2
    m_bias_output = beta1 * m_bias_output + (1 - beta1) * np.sum(output_layer_delta, axis=0, keepdims=True)
    v_bias_output = beta2 * v_bias_output + (1 - beta2) * np.sum(output_layer_delta, axis=0, keepdims=True)**2

    # Bias and weights update using Adam
    weights_input_hidden += learning_rate * m_weights_input_hidden / (np.sqrt(v_weights_input_hidden) + epsilon)
    weights_hidden_output += learning_rate * m_weights_hidden_output / (np.sqrt(v_weights_hidden_output) + epsilon)

    bias_hidden += learning_rate * m_bias_hidden / (np.sqrt(v_bias_hidden) + epsilon)
    bias_output += learning_rate * m_bias_output / (np.sqrt(v_bias_output) + epsilon)

    # Print the loss every 1000 epochs to observe the convergence
    if epoch % 1000 == 0:
        print(f"Epoch {epoch}, Loss: {loss}")

# Final output after training
print("Final Output after Training:")
print(output_layer_output)


Epoch 0, Loss: 0.7365458679333212
Epoch 1000, Loss: 0.48382479561878144
Epoch 2000, Loss: 0.4830257042158113
Epoch 3000, Loss: 0.48302586513784646
Epoch 4000, Loss: 0.4826592511730427
Epoch 5000, Loss: 0.4824799320898663
Epoch 6000, Loss: 0.482690820579996
Epoch 7000, Loss: 0.4824523018937604
Epoch 8000, Loss: 0.4823368411270143
Epoch 9000, Loss: 0.48226591160173904
Final Output after Training:
[[5.74179428e-04]
 [6.66578159e-01]
 [6.66578159e-01]
 ...
 [6.66578159e-01]
 [6.66578159e-01]
 [6.66578159e-01]]


In [None]:

from train import XORModel

def make_inference(inputs):
    """
    Make predictions using the trained model.
    
    Args:
    inputs (numpy array): Input data to make predictions.
    
    Returns:
    numpy array: Predicted output values.
    """
    model = XORModel()
    # Load the trained model (we'll skip this part for now)
    # model.load_model('model.save') 
    
    predicted_output, _, _ = model.forward_propagation(inputs)
    return predicted_output
