# Implementation of Loss Functions - RELU in MLP

In [1]:
import numpy as np

def relu(x):
    return np.maximum(0, x)

# Example usage
x = np.array([-2, -1, 0, 1, 2])
output = relu(x)
print(output)


[0 0 0 1 2]


In [None]:
# Output range: [0, ∞)
# Introduces non-linearity while being computationally efficient.
# Used in hidden layers of deep neural networks.
# Avoids vanishing gradient problem compared to sigmoid/tanh.

In [3]:
import numpy as np

class Perceptron:
    def __init__(self, input_size, learning_rate=0.1):
        self.weights = np.random.randn(input_size)
        self.bias = np.random.randn()
        self.learning_rate = learning_rate

    def relu(self, x):
        return np.maximum(0, x)

    def relu_derivative(self, x):
        return np.where(x > 0, 1, 0)  # Derivative of ReLU

    def predict(self, inputs):
        weighted_sum = np.dot(inputs, self.weights) + self.bias
        return self.relu(weighted_sum)

    def train(self, X, y, epochs=100):
        for epoch in range(epochs):
            for i in range(len(X)):
                weighted_sum = np.dot(X[i], self.weights) + self.bias
                prediction = self.relu(weighted_sum)
                error = y[i] - prediction

                # Gradient descent update rule
                delta = error * self.relu_derivative(weighted_sum)
                self.weights += self.learning_rate * delta * X[i]
                self.bias += self.learning_rate * delta

# Example usage
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])  # Inputs
y = np.array([0, 1, 1, 0])  # XOR problem (not solvable with a single-layer perceptron)

perceptron = Perceptron(input_size=2, learning_rate=0.1)
perceptron.train(X, y, epochs=10000)

# Testing
for i in range(len(X)):
    print(f"Input: {X[i]}, Predicted Output: {perceptron.predict(X[i])}")


Input: [0 0], Predicted Output: 0.0
Input: [0 1], Predicted Output: 0.0
Input: [1 0], Predicted Output: 0.0
Input: [1 1], Predicted Output: 0.0


In [None]:
#  ReLU Activation Function: Used instead of sigmoid/tanh for non-linearity.
#  Derivative of ReLU: Helps with weight updates during training.
#  Gradient Descent Updates: Adjusts weights and bias based on error and ReLU derivative.
#  Limitation: A single-layer perceptron cannot solve XOR (a multi-layer perceptron is needed).

In [5]:
import numpy as np

class MLP:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.1):
        # Initialize weights and biases
        self.W1 = np.random.randn(input_size, hidden_size)  # Input -> Hidden weights
        self.b1 = np.random.randn(hidden_size)  # Hidden layer bias
        self.W2 = np.random.randn(hidden_size, output_size)  # Hidden -> Output weights
        self.b2 = np.random.randn(output_size)  # Output layer bias
        self.learning_rate = learning_rate

    def relu(self, x):
        return np.maximum(0, x)

    def relu_derivative(self, x):
        return np.where(x > 0, 1, 0)

    def predict(self, inputs):
        # Forward pass
        self.hidden_input = np.dot(inputs, self.W1) + self.b1
        self.hidden_output = self.relu(self.hidden_input)
        self.final_input = np.dot(self.hidden_output, self.W2) + self.b2
        self.final_output = self.relu(self.final_input)
        return self.final_output

    def train(self, X, y, epochs=10000):
        for epoch in range(epochs):
            for i in range(len(X)):
                # Forward pass
                hidden_input = np.dot(X[i], self.W1) + self.b1
                hidden_output = self.relu(hidden_input)
                final_input = np.dot(hidden_output, self.W2) + self.b2
                final_output = self.relu(final_input)

                # Compute error
                error = y[i] - final_output

                # Backpropagation
                d_output = error * self.relu_derivative(final_input)
                d_hidden = np.dot(d_output, self.W2.T) * self.relu_derivative(hidden_input)

                # Update weights and biases
                self.W2 += self.learning_rate * np.outer(hidden_output, d_output)
                self.b2 += self.learning_rate * d_output
                self.W1 += self.learning_rate * np.outer(X[i], d_hidden)
                self.b1 += self.learning_rate * d_hidden

# Example usage (XOR problem)
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])  # Inputs
y = np.array([[0], [1], [1], [0]])  # XOR outputs

mlp = MLP(input_size=2, hidden_size=4, output_size=1, learning_rate=0.1)
mlp.train(X, y, epochs=10000)

# Testing
for i in range(len(X)):
    print(f"Input: {X[i]}, Predicted Output: {mlp.predict(X[i])}")


Input: [0 0], Predicted Output: [0.]
Input: [0 1], Predicted Output: [0.]
Input: [1 0], Predicted Output: [0.]
Input: [1 1], Predicted Output: [0.]


In [None]:
# Multi-Layer Structure:
# Input Layer (2 neurons) → Hidden Layer (4 neurons, ReLU activation) → Output Layer (1 neuron, ReLU activation)
# Forward Pass:
# Compute hidden layer activations using ReLU.
# Compute output layer activations using ReLU.
# Backpropagation:
# Compute errors and apply gradient descent to update weights and biases.
# Training on XOR problem:
# Unlike a single-layer perceptron, this MLP can solve XOR due to the hidden layer.