# üß† ML Workshop - Neural Network Basics

**Author:** Shuvam Banerji Seal

This notebook covers:
- Perceptrons and neurons
- Activation functions
- Multi-layer perceptrons (MLPs)
- Forward propagation
- Building a simple neural network from scratch

---

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Circle, FancyArrow, FancyBboxPatch
import matplotlib.patches as mpatches

# Set style
plt.style.use('dark_background')
plt.rcParams['figure.figsize'] = (12, 8)

# Colors
CYAN = '#00d9ff'
PINK = '#ff6b9d'
YELLOW = '#c8ff00'
GREEN = '#00ff88'
ORANGE = '#ff9500'

## 1. The Perceptron: Simplest Neural Unit

$$y = \sigma(\mathbf{w} \cdot \mathbf{x} + b) = \sigma\left(\sum_{i=1}^{n} w_i x_i + b\right)$$

Where $\sigma$ is an activation function.

In [None]:
# Visualize a single neuron
fig, ax = plt.subplots(figsize=(14, 8))

# Draw neuron body
neuron = Circle((0.5, 0.5), 0.1, color=CYAN, ec='white', linewidth=2, zorder=10)
ax.add_patch(neuron)

# Draw inputs
input_labels = ['$x_1$', '$x_2$', '$x_3$', '$b$ (bias)']
input_y = [0.8, 0.6, 0.4, 0.2]
weight_labels = ['$w_1$', '$w_2$', '$w_3$', '1']
colors = [PINK, YELLOW, GREEN, ORANGE]

for i, (label, y, w_label, color) in enumerate(zip(input_labels, input_y, weight_labels, colors)):
    # Input circles
    input_circle = Circle((0.1, y), 0.03, color=color, ec='white', zorder=10)
    ax.add_patch(input_circle)
    ax.text(0.03, y, label, fontsize=14, ha='center', va='center', color=color)
    
    # Arrows with weights
    ax.annotate('', xy=(0.4, 0.5), xytext=(0.13, y),
                arrowprops=dict(arrowstyle='->', color=color, lw=2))
    ax.text(0.25, (y + 0.5)/2 + 0.03, w_label, fontsize=12, color=color)

# Output
ax.annotate('', xy=(0.75, 0.5), xytext=(0.6, 0.5),
            arrowprops=dict(arrowstyle='->', color='white', lw=2))
ax.text(0.85, 0.5, '$y = \sigma(\sum w_i x_i + b)$', fontsize=14, va='center')

# Labels
ax.text(0.5, 0.5, '$\Sigma$', fontsize=20, ha='center', va='center', color='black', zorder=11)
ax.text(0.5, 0.32, 'Neuron', fontsize=12, ha='center', color=CYAN)

# Box for formula
formula_box = FancyBboxPatch((0.6, 0.1), 0.38, 0.15, boxstyle="round,pad=0.02",
                              facecolor='#1a1a2e', edgecolor=CYAN, linewidth=2)
ax.add_patch(formula_box)
ax.text(0.79, 0.175, 'Forward Pass:\n$z = w_1x_1 + w_2x_2 + w_3x_3 + b$\n$y = \sigma(z)$', 
        fontsize=11, ha='center', va='center', color='white')

ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.set_aspect('equal')
ax.axis('off')
ax.set_title('Anatomy of a Single Neuron', fontsize=16, pad=20)

plt.tight_layout()
plt.show()

## 2. Activation Functions: Adding Non-linearity

Without activation functions, a neural network is just a linear transformation!

In [None]:
# Define activation functions
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def relu(x):
    return np.maximum(0, x)

def tanh(x):
    return np.tanh(x)

def leaky_relu(x, alpha=0.01):
    return np.where(x > 0, x, alpha * x)

def softmax(x):
    exp_x = np.exp(x - np.max(x))  # Numerical stability
    return exp_x / exp_x.sum()

# Plot all activation functions
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

x = np.linspace(-5, 5, 200)

activations = [
    (sigmoid, 'Sigmoid: $\sigma(x) = \\frac{1}{1+e^{-x}}$', CYAN, 'Outputs (0,1), good for probabilities'),
    (relu, 'ReLU: $f(x) = \max(0, x)$', PINK, 'Most popular! Fast, sparse activations'),
    (tanh, 'Tanh: $f(x) = \\tanh(x)$', YELLOW, 'Outputs (-1,1), zero-centered'),
    (leaky_relu, 'Leaky ReLU: $f(x) = \max(0.01x, x)$', GREEN, 'Fixes "dying ReLU" problem'),
]

for ax, (func, title, color, desc) in zip(axes.flatten(), activations):
    y = func(x)
    ax.plot(x, y, color=color, linewidth=3)
    ax.axhline(y=0, color='white', linewidth=0.5, alpha=0.5)
    ax.axvline(x=0, color='white', linewidth=0.5, alpha=0.5)
    ax.set_xlabel('z (pre-activation)')
    ax.set_ylabel('a (activation)')
    ax.set_title(title, fontsize=13)
    ax.grid(True, alpha=0.3)
    
    # Add description box
    ax.text(0.02, 0.98, desc, transform=ax.transAxes, fontsize=10,
            verticalalignment='top', bbox=dict(boxstyle='round', facecolor='#1a1a2e', alpha=0.8))

plt.suptitle('Activation Functions', fontsize=16, y=1.02)
plt.tight_layout()
plt.show()

## 3. Build a Neural Network from Scratch

In [None]:
class SimpleNeuralNetwork:
    """
    A simple 2-layer neural network built from scratch.
    Architecture: Input -> Hidden (ReLU) -> Output (Sigmoid)
    """
    
    def __init__(self, input_size, hidden_size, output_size):
        # Initialize weights with Xavier initialization
        self.W1 = np.random.randn(input_size, hidden_size) * np.sqrt(2.0 / input_size)
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * np.sqrt(2.0 / hidden_size)
        self.b2 = np.zeros((1, output_size))
        
        print(f"\nüß† Neural Network Created:")
        print(f"   Input layer:  {input_size} neurons")
        print(f"   Hidden layer: {hidden_size} neurons (ReLU)")
        print(f"   Output layer: {output_size} neurons (Sigmoid)")
        print(f"   Total parameters: {self.count_parameters()}")
    
    def count_parameters(self):
        return (self.W1.size + self.b1.size + self.W2.size + self.b2.size)
    
    def relu(self, z):
        return np.maximum(0, z)
    
    def relu_derivative(self, z):
        return (z > 0).astype(float)
    
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-np.clip(z, -500, 500)))
    
    def forward(self, X):
        """Forward propagation"""
        # Layer 1
        self.z1 = X @ self.W1 + self.b1
        self.a1 = self.relu(self.z1)
        
        # Layer 2
        self.z2 = self.a1 @ self.W2 + self.b2
        self.a2 = self.sigmoid(self.z2)
        
        return self.a2
    
    def compute_loss(self, y_true, y_pred):
        """Binary cross-entropy loss"""
        epsilon = 1e-15
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
    
    def backward(self, X, y_true, learning_rate=0.1):
        """Backpropagation"""
        m = X.shape[0]
        
        # Output layer gradients
        dz2 = self.a2 - y_true
        dW2 = (1/m) * self.a1.T @ dz2
        db2 = (1/m) * np.sum(dz2, axis=0, keepdims=True)
        
        # Hidden layer gradients
        dz1 = (dz2 @ self.W2.T) * self.relu_derivative(self.z1)
        dW1 = (1/m) * X.T @ dz1
        db1 = (1/m) * np.sum(dz1, axis=0, keepdims=True)
        
        # Update weights
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1
    
    def train(self, X, y, epochs=100, learning_rate=0.1, verbose=True):
        """Train the network"""
        losses = []
        
        for epoch in range(epochs):
            # Forward
            y_pred = self.forward(X)
            
            # Compute loss
            loss = self.compute_loss(y, y_pred)
            losses.append(loss)
            
            # Backward
            self.backward(X, y, learning_rate)
            
            if verbose and epoch % (epochs // 10) == 0:
                acc = np.mean((y_pred > 0.5) == y) * 100
                print(f"Epoch {epoch:4d}: Loss = {loss:.4f}, Accuracy = {acc:.1f}%")
        
        return losses
    
    def predict(self, X):
        return (self.forward(X) > 0.5).astype(int)

In [None]:
# Create XOR dataset (classic non-linear problem)
np.random.seed(42)

# Generate noisy XOR data
n_samples = 200
X = np.random.randn(n_samples, 2)
y = ((X[:, 0] > 0) ^ (X[:, 1] > 0)).astype(float).reshape(-1, 1)

# Add some noise
X += np.random.randn(n_samples, 2) * 0.1

# Visualize data
fig, ax = plt.subplots(figsize=(8, 8))
scatter = ax.scatter(X[:, 0], X[:, 1], c=y.ravel(), cmap='coolwarm', s=50, edgecolors='white')
ax.set_xlabel('Feature 1')
ax.set_ylabel('Feature 2')
ax.set_title('XOR Dataset (Non-linear!)')
ax.grid(True, alpha=0.3)
ax.axhline(y=0, color='white', linewidth=0.5, alpha=0.5)
ax.axvline(x=0, color='white', linewidth=0.5, alpha=0.5)
plt.colorbar(scatter, label='Class')
plt.show()

print("\nüìä Dataset:")
print(f"   Samples: {n_samples}")
print(f"   Features: 2")
print(f"   Classes: 2 (binary)")
print(f"   XOR: A linear model CANNOT solve this!")

In [None]:
# Train the network
nn = SimpleNeuralNetwork(input_size=2, hidden_size=8, output_size=1)
losses = nn.train(X, y, epochs=1000, learning_rate=0.5)

In [None]:
# Visualize training and decision boundary
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Loss curve
axes[0].plot(losses, color=CYAN, linewidth=2)
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss (Binary Cross-Entropy)')
axes[0].set_title('Training Loss')
axes[0].grid(True, alpha=0.3)

# Decision boundary
ax = axes[1]
h = 0.02
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

Z = nn.forward(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

ax.contourf(xx, yy, Z, levels=[0, 0.5, 1], cmap='coolwarm', alpha=0.4)
ax.contour(xx, yy, Z, levels=[0.5], colors=['white'], linewidths=[2])
ax.scatter(X[:, 0], X[:, 1], c=y.ravel(), cmap='coolwarm', s=50, edgecolors='white')
ax.set_xlabel('Feature 1')
ax.set_ylabel('Feature 2')
ax.set_title('Learned Decision Boundary')
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Final accuracy
predictions = nn.predict(X)
accuracy = np.mean(predictions == y) * 100
print(f"\nüéØ Final Accuracy: {accuracy:.1f}%")

## 4. Visualize Network Architecture

In [None]:
def draw_neural_network(layer_sizes, ax, title='Neural Network'):
    """
    Draw a neural network diagram.
    """
    n_layers = len(layer_sizes)
    max_neurons = max(layer_sizes)
    
    layer_colors = [GREEN, CYAN, YELLOW, PINK, ORANGE]
    if n_layers > len(layer_colors):
        layer_colors = layer_colors * (n_layers // len(layer_colors) + 1)
    
    # Calculate positions
    v_spacing = 1.0 / max_neurons
    h_spacing = 1.0 / (n_layers + 1)
    
    # Store neuron positions for connections
    positions = []
    
    for layer_idx, n_neurons in enumerate(layer_sizes):
        layer_positions = []
        x = (layer_idx + 1) * h_spacing
        
        # Center neurons vertically
        start_y = 0.5 - (n_neurons - 1) * v_spacing / 2
        
        for neuron_idx in range(n_neurons):
            y = start_y + neuron_idx * v_spacing
            layer_positions.append((x, y))
            
            # Draw neuron
            circle = Circle((x, y), 0.02, color=layer_colors[layer_idx], 
                           ec='white', linewidth=1.5, zorder=3)
            ax.add_patch(circle)
        
        positions.append(layer_positions)
    
    # Draw connections
    for layer_idx in range(n_layers - 1):
        for start_pos in positions[layer_idx]:
            for end_pos in positions[layer_idx + 1]:
                ax.plot([start_pos[0], end_pos[0]], [start_pos[1], end_pos[1]], 
                       'w-', alpha=0.2, linewidth=0.5, zorder=1)
    
    # Add layer labels
    labels = ['Input'] + [f'Hidden {i+1}' for i in range(n_layers - 2)] + ['Output']
    for i, (label, n_neurons) in enumerate(zip(labels, layer_sizes)):
        x = (i + 1) * h_spacing
        ax.text(x, 0.05, f'{label}\n({n_neurons})', ha='center', fontsize=10, color='white')
    
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.set_aspect('equal')
    ax.axis('off')
    ax.set_title(title, fontsize=14, pad=20)


# Visualize different architectures
fig, axes = plt.subplots(1, 3, figsize=(18, 6))

architectures = [
    ([2, 4, 1], 'Simple: 2 ‚Üí 4 ‚Üí 1'),
    ([4, 8, 8, 2], 'Deeper: 4 ‚Üí 8 ‚Üí 8 ‚Üí 2'),
    ([784, 256, 128, 10], 'MNIST: 784 ‚Üí 256 ‚Üí 128 ‚Üí 10'),
]

for ax, (layers, title) in zip(axes, architectures):
    # For large networks, show representative neurons
    display_layers = [min(l, 12) for l in layers]
    draw_neural_network(display_layers, ax, title)

plt.tight_layout()
plt.show()

## 5. Forward Pass Walkthrough

Let's trace through a forward pass with real numbers!

In [None]:
# Create a tiny network for demonstration
np.random.seed(0)

# Simple weights
W1 = np.array([[0.5, -0.3], 
               [0.2, 0.8]])
b1 = np.array([[0.1, -0.1]])

W2 = np.array([[0.4], 
               [-0.5]])
b2 = np.array([[0.2]])

# Input
x = np.array([[1.0, 2.0]])

print("="*60)
print("FORWARD PASS WALKTHROUGH")
print("="*60)

print(f"\nüì• INPUT: x = {x}")
print(f"\nüìä WEIGHTS:")
print(f"   W1 = \n{W1}")
print(f"   b1 = {b1}")
print(f"   W2 = {W2.T}")
print(f"   b2 = {b2}")

# Layer 1: Linear transformation
z1 = x @ W1 + b1
print(f"\nüî¢ LAYER 1 - Linear:")
print(f"   z1 = x @ W1 + b1")
print(f"   z1 = {x} @ \n{W1}")
print(f"      + {b1}")
print(f"   z1 = {z1}")

# Layer 1: Activation (ReLU)
a1 = np.maximum(0, z1)
print(f"\n‚ö° LAYER 1 - ReLU Activation:")
print(f"   a1 = max(0, z1)")
print(f"   a1 = {a1}")

# Layer 2: Linear transformation
z2 = a1 @ W2 + b2
print(f"\nüî¢ LAYER 2 - Linear:")
print(f"   z2 = a1 @ W2 + b2")
print(f"   z2 = {a1} @ {W2.T} + {b2}")
print(f"   z2 = {z2}")

# Layer 2: Activation (Sigmoid)
a2 = 1 / (1 + np.exp(-z2))
print(f"\n‚ö° LAYER 2 - Sigmoid Activation:")
print(f"   a2 = sigmoid(z2) = 1/(1 + e^(-z2))")
print(f"   a2 = {a2}")

print(f"\nüì§ OUTPUT: y_pred = {a2[0,0]:.4f}")
print(f"   ‚Üí Predicted class: {1 if a2[0,0] > 0.5 else 0}")
print("="*60)

## üìù Summary

In this notebook, we covered:

1. **Perceptron**: The basic unit of neural networks (weighted sum + activation)
2. **Activation Functions**: ReLU, Sigmoid, Tanh ‚Äî adding non-linearity
3. **Built a Neural Network from Scratch**: Implemented forward pass, loss, and backprop
4. **XOR Problem**: Demonstrated that NNs can learn non-linear boundaries
5. **Forward Pass**: Traced through computation with real numbers

**Key Insights:**
- Neural networks are just compositions of linear transformations + activations
- Non-linearity (activation functions) is what makes NNs powerful
- Backpropagation is just the chain rule applied systematically

**Next**: Training a network on MNIST!