In [1]:
import numpy as np
import matplotlib.pyplot as plt


In [2]:
class BasicNeuron:
    """
    A basic artificial neuron implementation that mimics biological neurons.

    The neuron receives inputs, applies weights, adds bias, and produces an output
    through an activation function - just like neurons in our brain!
    """

    def __init__(self, num_inputs, activation_function='sigmoid'):
        """
        Initialize the neuron with random weights and bias.

        Args:
            num_inputs: Number of input connections to this neuron
            activation_function: Type of activation function ('sigmoid', 'relu', 'tanh', 'linear')
        """
        # Initialize weights randomly between -1 and 1
        # Each input gets its own weight - this determines how important each input is
        self.weights = np.random.uniform(-1, 1, num_inputs)

        # Initialize bias - this shifts the activation function left or right
        # Bias helps the neuron fire even when inputs are small
        self.bias = np.random.uniform(-1, 1)

        # Store the activation function type
        if activation_function not in ['sigmoid', 'relu', 'tanh', 'linear']:
            raise ValueError(f"Unsupported activation function: {activation_function}")
        self.activation_function = activation_function

        # Store the number of inputs for validation
        self.num_inputs = num_inputs

        # print(f"Neuron created with {num_inputs} inputs")
        # print(f"Initial weights: {self.weights}")
        # print(f"Initial bias: {self.bias}")
        # print(f"Activation function: {activation_function}")

    def sigmoid(self, x):
        """
        Sigmoid activation function: f(x) = 1 / (1 + e^(-x))

        - Outputs values between 0 and 1
        - Smooth, differentiable curve
        - Good for binary classification problems
        """
        # Clip x to prevent overflow in exponential
        x = np.clip(x, -500, 500)
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        """Derivative of the sigmoid function."""
        # Derivative of sigmoid(x) is sigmoid(x) * (1 - sigmoid(x))
        # We can use the output of the sigmoid function itself for this
        s = self.sigmoid(x)
        return s * (1 - s)

    def relu(self, x):
        """ReLU activation function: f(x) = max(0, x)"""
        return np.maximum(0, x)

    def relu_derivative(self, x):
        """Derivative of the ReLU function."""
        return np.where(x > 0, 1, 0)

    def tanh(self, x):
        """Tanh activation function: f(x) = tanh(x)"""
        return np.tanh(x)

    def tanh_derivative(self, x):
        """Derivative of the Tanh function."""
        # Derivative of tanh(x) is 1 - tanh(x)^2
        return 1 - np.tanh(x)**2

    def linear(self, x):
        """Linear activation function: f(x) = x"""
        return x

    def linear_derivative(self, x):
        """Derivative of the Linear function."""
        return 1

    def get_activation_derivative(self, z):
        """Returns the derivative of the chosen activation function for a given net input z."""
        if self.activation_function == 'sigmoid':
            return self.sigmoid_derivative(z)
        elif self.activation_function == 'relu':
            return self.relu_derivative(z)
        elif self.activation_function == 'tanh':
            return self.tanh_derivative(z)
        elif self.activation_function == 'linear':
            return self.linear_derivative(z)
        else:
            raise ValueError(f"Derivative not implemented for {self.activation_function}")


    def forward(self, inputs):

        # Convert inputs to numpy array for easier computation
        inputs = np.array(inputs)

        # Validate input size
        if len(inputs) != self.num_inputs:
            raise ValueError(f"Expected {self.num_inputs} inputs, got {len(inputs)}")

        # Step 1 & 2: Weighted sum (dot product of inputs and weights)
        # This is like: w1*x1 + w2*x2 + w3*x3 + ... + wn*xn
        weighted_sum = np.dot(inputs, self.weights)

        # Step 3: Add bias
        # Bias allows the neuron to fire even when inputs are zero
        z = weighted_sum + self.bias

        # Step 4: Apply activation function
        if self.activation_function == 'sigmoid':
            output = self.sigmoid(z)
        elif self.activation_function == 'relu':
            output = self.relu(z)
        elif self.activation_function == 'tanh':
            output = self.tanh(z)
        elif self.activation_function == 'linear':
            output = self.linear(z)
        else:
            raise ValueError(f"Unknown activation function: {self.activation_function}")

        # Store intermediate values for educational purposes
        self.last_inputs = inputs
        self.last_weighted_sum = weighted_sum
        self.last_z = z # Store z (net input before activation) for derivative calculation
        self.last_output = output

        return output

    def update_weights(self, new_weights, new_bias=None):
        if len(new_weights) != self.num_inputs:
            raise ValueError(f"Expected {self.num_inputs} weights, got {len(new_weights)}")

        self.weights = np.array(new_weights)

        if new_bias is not None:
            self.bias = new_bias

        # print(f"Weights updated to: {self.weights}")
        # print(f"Bias updated to: {self.bias}")

    def get_details(self, verbose=True):
        if hasattr(self, 'last_inputs'):
            if verbose:
                print("\n--- Neuron Computation Details ---")
                print(f"Inputs: {self.last_inputs}")
                print(f"Weights: {self.weights}")
                print(f"Weighted sum: {self.last_weighted_sum:.4f}")
                print(f"Bias: {self.bias:.4f}")
                print(f"z (weighted sum + bias): {self.last_z:.4f}")
                print(f"Final output: {self.last_output:.4f}")
        else:
            if verbose:
                print("No computation has been performed yet!")


 --- 1. Hebbian Learning ---
#### Hebbian learning is one of the oldest and simplest learning rules.
#### It states that if two neurons on either side of a synapse are
#### simultaneously active, then the strength of that synapse is increased.
#### In its simplest form, the weight update is proportional to the product
#### of the pre-synaptic and post-synaptic activations.

In [22]:
def hebbian_learning(inputs, outputs, learning_rate=1.0):
   
    num_inputs = inputs.shape[1]
    # Hebbian learning typically doesn't use a complex activation function for its rule,
    # so we use 'linear' in BasicNeuron and manage the output directly.
    neuron = BasicNeuron(num_inputs, activation_function='linear')
    # Initialize weights to zeros as per traditional Hebbian setup for simplicity in this demo
    neuron.update_weights(np.zeros(num_inputs), 0.0)

    num_patterns = inputs.shape[0]

    print(f"\n--- Hebbian Learning ---")
    print(f"Initial Weights: {neuron.weights}, Initial Bias: {neuron.bias}")

    for i in range(num_patterns):
        x = inputs[i]
        y_desired = outputs[i]

        # Hebbian update rule: delta_w_i = learning_rate * x_i * y_desired
        # Here, we assume the output 'y_desired' is the post-synaptic activity.
        delta_weights = learning_rate * x * y_desired
        new_weights = neuron.weights + delta_weights
        new_bias = neuron.bias # Hebbian rule typically doesn't include bias update in this form

        neuron.update_weights(new_weights, new_bias)

        print(f"Pattern {i+1}: Input={x}, Desired Output={y_desired}")
        print(f"  Delta Weights: {delta_weights}")
        print(f"  Updated Weights: {neuron.weights}, Updated Bias: {neuron.bias}")
    neuron.forward(inputs[2])  # Forward pass to compute final output for the last input
    neuron.get_details(verbose=True)

    print(f"Final Weights (Hebbian): {neuron.weights}, Final Bias: {neuron.bias}")
    return neuron.weights


#### Example for Hebbian Learning: Simple pattern association
#### Let's try to learn a pattern where input [1, 1] should activate, others not.
#### Note: Hebbian learning is often used for associative memory, not direct classification.
#### It tends to learn correlations.

In [23]:
hebbian_inputs = np.array([
    [1, 0],
    [0, 1],
    [1, 1],
    [0, 0]
])
hebbian_outputs = np.array([
    1,  # For [1, 0]
    1,  # For [0, 1]
    1,  # For [1, 1] - this pattern should strengthen weights
    0   # For [0, 0]
])
hebbian_learning(hebbian_inputs, hebbian_outputs)



--- Hebbian Learning ---
Initial Weights: [0. 0.], Initial Bias: 0.0
Pattern 1: Input=[1 0], Desired Output=1
  Delta Weights: [1. 0.]
  Updated Weights: [1. 0.], Updated Bias: 0.0
Pattern 2: Input=[0 1], Desired Output=1
  Delta Weights: [0. 1.]
  Updated Weights: [1. 1.], Updated Bias: 0.0
Pattern 3: Input=[1 1], Desired Output=1
  Delta Weights: [1. 1.]
  Updated Weights: [2. 2.], Updated Bias: 0.0
Pattern 4: Input=[0 0], Desired Output=0
  Delta Weights: [0. 0.]
  Updated Weights: [2. 2.], Updated Bias: 0.0

--- Neuron Computation Details ---
Inputs: [1 1]
Weights: [2. 2.]
Weighted sum: 4.0000
Bias: 0.0000
z (weighted sum + bias): 4.0000
Final output: 4.0000
Final Weights (Hebbian): [2. 2.], Final Bias: 0.0


array([2., 2.])

#### --- 2. Perceptron Learning Rule ---
#### The Perceptron learning rule is an algorithm for supervised learning of binary classifiers.
#### It is used for linearly separable data. The rule updates weights only when a
#### misclassification occurs.

In [36]:
def activation_step(net_input):
    """Step activation function for perceptron."""
    return 1 if net_input >= 0 else 0

def perceptron_learning(inputs, outputs, learning_rate=0.1, epochs=100):
    """
    Demonstrates the Perceptron learning rule using BasicNeuron class.
    Assumes binary inputs (0 or 1) and binary outputs (0 or 1).

    Args:
        inputs (np.array): A 2D array where each row is an input pattern.
        outputs (np.array): A 1D array of desired outputs for each input pattern.
        learning_rate (float): The learning rate (eta).
        epochs (int): Number of training iterations.
    """
    num_inputs = inputs.shape[1]
    # Use 'linear' activation in BasicNeuron and apply step function externally
    # because BasicNeuron's built-in activations are continuous.
    neuron = BasicNeuron(num_inputs, activation_function='linear')

    num_patterns = inputs.shape[0]

    print(f"\n--- Perceptron Learning Rule ---")
    print(f"Initial Weights: {neuron.weights}, Initial Bias: {neuron.bias}")

    for epoch in range(epochs):
        errors = 0
        for i in range(num_patterns):
            x = inputs[i]
            y_desired = outputs[i]

            # Calculate net input using neuron's forward pass (before step activation)
            # neuron.forward(x) will return the linear output (z)
            net_input_raw = neuron.forward(x)

            # Get actual output by applying the step activation
            y_actual = activation_step(net_input_raw)

            # Calculate error
            error = y_desired - y_actual

            if error != 0: # Update weights only on misclassification
                # Perceptron update rule: delta_w_i = learning_rate * error * x_i
                # delta_bias = learning_rate * error
                new_weights = neuron.weights + learning_rate * error * x
                new_bias = neuron.bias + learning_rate * error
                neuron.update_weights(new_weights, new_bias)
                errors += 1

        # print(f"Epoch {epoch+1}: Errors = {errors}, Weights = {neuron.weights}, Bias = {neuron.bias}")
        if errors == 0:
            print(f"Converged at Epoch {epoch+1}.")
            break
    else:
        print(f"Did not converge within {epochs} epochs.")

    print(f"Final Weights (Perceptron): {neuron.weights}, Final Bias: {neuron.bias}")

    # Test the trained perceptron
    print("\n--- Perceptron Test ---")
    for i in range(num_patterns):
        x = inputs[i]
        net_input_raw = neuron.forward(x)
        y_predicted = activation_step(net_input_raw)
        print(f"Input: {x}, Desired: {outputs[i]}, Predicted: {y_predicted}")


In [37]:
# Example for Perceptron Learning: AND gate
# This is a linearly separable problem.
and_inputs = np.array([
    [0, 0],
    [0, 1],
    [1, 0],
    [1, 1]
])
and_outputs = np.array([0, 0, 0, 1])
perceptron_learning(and_inputs, and_outputs, learning_rate=0.1, epochs=100)



--- Perceptron Learning Rule ---
Initial Weights: [-0.07864665  0.40864541], Initial Bias: -0.3501217279007556
Converged at Epoch 5.
Final Weights (Perceptron): [0.22135335 0.30864541], Final Bias: -0.45012172790075555

--- Perceptron Test ---
Input: [0 0], Desired: 0, Predicted: 0
Input: [0 1], Desired: 0, Predicted: 0
Input: [1 0], Desired: 0, Predicted: 0
Input: [1 1], Desired: 1, Predicted: 1


In [8]:
# Example for Perceptron Learning: XOR gate (Non-linearly separable)
# This will demonstrate that a single perceptron cannot solve XOR.
xor_inputs = np.array([
    [0, 0],
    [0, 1],
    [1, 0],
    [1, 1]
])
xor_outputs = np.array([0, 1, 1, 0])
# Uncomment to see Perceptron fail on XOR
perceptron_learning(xor_inputs, xor_outputs, learning_rate=0.1, epochs=100)



--- Perceptron Learning Rule ---
Initial Weights: [-0.19967415  0.63827407], Initial Bias: 0.9321920685899125
Did not converge within 100 epochs.
Final Weights (Perceptron): [-0.29967415  0.03827407], Final Bias: 0.032192068589912665

--- Perceptron Test ---
Input: [0 0], Desired: 0, Predicted: 1
Input: [0 1], Desired: 1, Predicted: 1
Input: [1 0], Desired: 1, Predicted: 0
Input: [1 1], Desired: 0, Predicted: 0


#### --- 3. Delta Rule (Widrow-Hoff Rule / LMS Rule) ---
#### The Delta Rule is a gradient descent learning rule for updating the weights
#### of artificial neurons in a single-layer feedforward network. It's a generalization
#### of the perceptron learning rule and is often used with continuous activation
#### functions (like linear or sigmoid) to minimize the mean squared error (MSE).


In [38]:
def delta_rule_learning(inputs, outputs, learning_rate=0.1, epochs=100, activation_func='linear'):
    """
    Demonstrates the Delta learning rule using BasicNeuron class.

    Args:
        inputs (np.array): A 2D array where each row is an input pattern.
        outputs (np.array): A 1D array of desired outputs for each input pattern.
        learning_rate (float): The learning rate (eta).
        epochs (int): Number of training iterations.
        activation_func (str): 'linear' or 'sigmoid' for the activation function.
    """
    num_inputs = inputs.shape[1]
    neuron = BasicNeuron(num_inputs, activation_function=activation_func)

    num_patterns = inputs.shape[0]

    print(f"\n--- Delta Rule Learning ({activation_func.capitalize()} Activation) ---")
    print(f"Initial Weights: {neuron.weights}, Initial Bias: {neuron.bias}")

    for epoch in range(epochs):
        total_error = 0
        for i in range(num_patterns):
            x = inputs[i]
            y_desired = outputs[i]

            # Calculate actual output using neuron's forward pass
            y_actual = neuron.forward(x)

            # Calculate error
            error = y_desired - y_actual
            total_error += 0.5 * (error ** 2) # Sum of squared errors

            # Get derivative of activation function (f'(net_input))
            # The last_z attribute stores the net input (z) before activation
            derivative_activation = neuron.get_activation_derivative(neuron.last_z)

            # Delta Rule update: delta_w_i = learning_rate * error * x_i * f'(net_input)
            new_weights = neuron.weights + learning_rate * error * x * derivative_activation
            new_bias = neuron.bias + learning_rate * error * derivative_activation
            neuron.update_weights(new_weights, new_bias)

        # print(f"Epoch {epoch+1}: Total Squared Error = {total_error:.4f}, Weights = {neuron.weights}, Bias = {neuron.bias:.4f}")
        # Stop if error is very small
        if total_error < 0.001:
            print(f"Converged at Epoch {epoch+1}.")
            break
    else:
        print(f"Did not converge within {epochs} epochs.")

    print(f"Final Weights (Delta Rule): {neuron.weights}, Final Bias: {neuron.bias}")

    # Test the trained network
    print(f"\n--- Delta Rule Test ({activation_func.capitalize()} Activation) ---")
    for i in range(num_patterns):
        x = inputs[i]
        y_predicted = neuron.forward(x)
        print(f"Input: {x}, Desired: {outputs[i]:.4f}, Predicted: {y_predicted:.4f}")


In [104]:
# Example for Delta Rule: Simple linear regression type problem
# Input x, desired output 2x + 1 (approximately)
delta_inputs = np.array([
    [0.1], [0.2], [0.3], [0.4], [0.5],
    [0.6], [0.7], [0.8], [0.9], [1.0]
])
delta_outputs = np.array([
    0.3, 0.5, 0.7, 0.9, 1.1,
    1.3, 1.5, 1.7, 1.9, 2.1
])

# Linear activation
delta_rule_learning(delta_inputs, delta_outputs, learning_rate=0.05, epochs=1000, activation_func='linear')



--- Delta Rule Learning (Linear Activation) ---
Initial Weights: [0.22081306], Initial Bias: 0.7324331208525157
Converged at Epoch 111.
Final Weights (Delta Rule): [1.95530239], Final Bias: 0.12887625638989872

--- Delta Rule Test (Linear Activation) ---
Input: [0.1], Desired: 0.3000, Predicted: 0.3244
Input: [0.2], Desired: 0.5000, Predicted: 0.5199
Input: [0.3], Desired: 0.7000, Predicted: 0.7155
Input: [0.4], Desired: 0.9000, Predicted: 0.9110
Input: [0.5], Desired: 1.1000, Predicted: 1.1065
Input: [0.6], Desired: 1.3000, Predicted: 1.3021
Input: [0.7], Desired: 1.5000, Predicted: 1.4976
Input: [0.8], Desired: 1.7000, Predicted: 1.6931
Input: [0.9], Desired: 1.9000, Predicted: 1.8886
Input: [1.], Desired: 2.1000, Predicted: 2.0842


In [111]:
#Example for Delta Rule with Sigmoid: Approximating a binary output with continuous values
# (Similar to AND gate, but outputs will be continuous between 0 and 1)
sigmoid_inputs = np.array([
    [0, 0],
    [0, 1],
    [1, 0],
    [1, 1]
])
sigmoid_outputs = np.array([0, 0, 0, 1]) # Target values are binary, but output will be continuous

delta_rule_learning(sigmoid_inputs, sigmoid_outputs, learning_rate=0.1, epochs=5000, activation_func='sigmoid')



--- Delta Rule Learning (Sigmoid Activation) ---
Initial Weights: [-0.35448624 -0.02904345], Initial Bias: -0.24896754946917254
Did not converge within 5000 epochs.
Final Weights (Delta Rule): [4.64672419 4.64596053], Final Bias: -7.065483662933188

--- Delta Rule Test (Sigmoid Activation) ---
Input: [0 0], Desired: 0.0000, Predicted: 0.0009
Input: [0 1], Desired: 0.0000, Predicted: 0.0817
Input: [1 0], Desired: 0.0000, Predicted: 0.0818
Input: [1 1], Desired: 1.0000, Predicted: 0.9027


### Experiment with Activation Functions:

Try different combinations of activation functions for the hidden and output layers (e.g., sigmoid for both, relu for hidden and sigmoid for output). Observe how this affects convergence and performance.