In [None]:
#deep learning - 1.using numpy 2.PyTorch

In [None]:
#1.using numpy

import numpy as np
import pandas as pd

#input Data mappings
x=np.array([[-2],[-1],[0],[1],[2]])
y=np.array([[0],[0],[0],[1],[1]])


#----data preprocessed

#using sigmoid activation fn
def sigmoid(x):return 1/(1+np.exp(-x))
#sigmoid derivatives
def sigmoid_derivatives(x):return x*(1-x)

#initialse the parameters
np.random.seed(42)
#for neuron1
w1=np.random.randn(1,1)   #np.random.randn(1, 1) generates a 1Ã—1 NumPy array containing a single random number drawn from a standard normal (Gaussian) distribution.
#here we have taken 2d array to dot use dot product for weighted sum
b1=np.zeros((1,1))
#for neuron2
w2=np.random.randn(1,1)
b2=np.zeros((1,1))

#training parameters
learning_rate=0.1
epochs=5000

#executing out training loops
for epoch in range(epochs):
  #forward propagation   --distance from actual or loss

  #i/p layer
  z1=np.dot(x,w1)+b1 #we are calculating the weighted inputs
  a1=sigmoid(z1)

  #o/p layer
  z2=np.dot(x,w2)+b2
  output=sigmoid(z2)

  #loss function
  loss=np.mean((y-output)**2)

  #backward propagation  --learning
  #o/p layer
  d_output=(y-output)*sigmoid_derivatives(output)   #
  d_w2=np.dot(a1.T,d_output)    #here we transposing a1 with a1.T
  d_b2=np.sum(d_output,axis=0,keepdims=True)  #here keepdims maintains the dimensions

  #hidden layer gradient
  d_hidden=np.dot(d_output,w2.T)*sigmoid_derivatives(a1)
  d_w1=np.dot(x.T,d_hidden)
  d_b1=np.sum(d_hidden,axis=0,keepdims=True)

  #now we will be updating the o/p layer weight and bias
  w2+=learning_rate*d_w2
  b2+=learning_rate*d_b2

  #now we will be updating hidden layer weight and bias
  w1+=learning_rate*d_w1
  b1+=learning_rate*d_b1

  #print losses at every 1000 epochs
  if epoch %1000==0:
    print(f"Epochs {epoch}, Loss:{loss :.4f}")

#testing our inputs
testing_inputs=np.array([[-3],[-.5],[0.75],[3],[1],[0]])

#forward pass to test data
z1=np.dot(testing_inputs,w1)+b1
a1=sigmoid(z1)
z2=np.dot(a1,w2)+b2
predictions=sigmoid(z2)
print("Predictions")
print(predictions)



Epochs 0, Loss:0.2936
Epochs 1000, Loss:0.0183
Epochs 2000, Loss:0.0071
Epochs 3000, Loss:0.0040
Epochs 4000, Loss:0.0027
Predictions
[[0.07575155]
 [0.08137866]
 [0.80654114]
 [0.93569339]
 [0.89717416]
 [0.12876753]]


In [None]:
#using pyTorch

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

#input and labels
x=torch.tensor([[-2.0],[-1.0],[0.0],[1.0],[2.0]])
y=torch.tensor([[0.0],[0.0],[0.0],[1.0],[1.0]])

#define neural network
class SimpleNN(nn.Module):
  def __init__(self):
    super(SimpleNN,self).__init__()
    self.hidden=nn.Linear(1,1) #hidden neuron -> w1,b1
    self.output=nn.Linear(1,1)  #output neuron -> w2,b2
    self.sigmoid=nn.Sigmoid()

  def forward(self,x):
    a1=self.sigmoid(self.hidden(x))
    out=self.sigmoid(self.output(a1))
    return out

#model
model=SimpleNN()

#loss and optimiser
criterion=nn.MSELoss()
optimizer=optim.SGD(model.parameters(),lr=0.1)

#training loop
epochs=5000
for epoch in range(epochs):
  #forward pass
  output=model(x)
  loss=criterion(output,y)

  #backpropagation
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

  if epoch%1000==0:
    print(f"Epochs {epoch}, Loss:{loss :.4f}")

#testing our inputs
testing_inputs=torch.tensor([[-3],[-.5],[0.5],[3],[1]])

# Perform forward pass to get predictions for testing data
predictions = model(testing_inputs)

print("Predictions:")
print(predictions.detach())  #method in PyTorch is used to remove a tensor from the computation graph

Epochs 0, Loss:0.2536
Epochs 1000, Loss:0.0425
Epochs 2000, Loss:0.0158
Epochs 3000, Loss:0.0088
Epochs 4000, Loss:0.0059
Predictions:
tensor([[0.0315],
        [0.0365],
        [0.5820],
        [0.9381],
        [0.9054]])


## Explanation of the NumPy Implementation (Cell `Pee__jhCB600`)

This code implements a simple, single-hidden-layer neural network from scratch using NumPy. It demonstrates the core concepts of a neural network, including forward propagation, loss calculation, backpropagation, and parameter updates.

### 1. Data Preparation
- **Input and Output Data:** `x` and `y` are defined as NumPy arrays representing simple input features and their corresponding binary labels (0 or 1).

### 2. Activation Functions
- **`sigmoid(x)`:** This function implements the sigmoid activation, which squashes values between 0 and 1. It's commonly used in the output layer for binary classification or in hidden layers.
- **`sigmoid_derivatives(x)`:** This calculates the derivative of the sigmoid function, which is crucial for the backpropagation step. It takes the *output* of the sigmoid function as input.

### 3. Model Initialization
- **Weights and Biases:** `w1`, `b1` (for the hidden layer) and `w2`, `b2` (for the output layer) are initialized with random values (weights) or zeros (biases). `np.random.seed(42)` ensures reproducibility of the random initialization.
- **Training Parameters:** `learning_rate` controls the step size during parameter updates, and `epochs` defines the number of training iterations.

### 4. Training Loop (`for epoch in range(epochs):`)

#### a. Forward Propagation
- **Hidden Layer:**
  - `z1 = np.dot(x, w1) + b1`: Calculates the weighted sum of inputs and bias for the hidden layer.
  - `a1 = sigmoid(z1)`: Applies the sigmoid activation function to `z1` to get the output of the hidden layer.
- **Output Layer:**
  - `z2 = np.dot(a1, w2) + b2`: Calculates the weighted sum of hidden layer outputs and bias for the output layer.
  - `output = sigmoid(z2)`: Applies the sigmoid activation to `z2` to get the final predictions.

#### b. Loss Calculation
- **`loss = np.mean((y - output)**2)`:** Calculates the Mean Squared Error (MSE) between the true labels (`y`) and the model's predictions (`output`). This quantifies how far off the predictions are.

#### c. Backward Propagation
This is where the model learns by adjusting its parameters based on the loss.
- **Output Layer Gradients:**
  - `d_output = (y - output) * sigmoid_derivatives(output)`: Calculates the error gradient at the output layer. The `(y - output)` part is the difference between target and prediction, and `sigmoid_derivatives(output)` scales this error based on the sigmoid's steepness.
  - `d_w2 = np.dot(a1.T, d_output)`: Computes the gradient for the output layer weights (`w2`) using the transpose of the hidden layer's output (`a1.T`).
  - `d_b2 = np.sum(d_output, axis=0, keepdims=True)`: Computes the gradient for the output layer bias (`b2`).
- **Hidden Layer Gradients:**
  - `d_hidden = np.dot(d_output, w2.T) * sigmoid_derivatives(a1)`: Backpropagates the error from the output layer to the hidden layer, considering the output weights (`w2.T`) and the hidden layer's sigmoid derivative.
  - `d_w1 = np.dot(x.T, d_hidden)`: Computes the gradient for the hidden layer weights (`w1`).
  - `d_b1 = np.sum(d_hidden, axis=0, keepdims=True)`: Computes the gradient for the hidden layer bias (`b1`).

#### d. Parameter Updates
- `w2 += learning_rate * d_w2`
- `b2 += learning_rate * d_b2`
- `w1 += learning_rate * d_w1`
- `b1 += learning_rate * d_b1`:
  The weights and biases are updated by subtracting a fraction (`learning_rate`) of their respective gradients. This moves the parameters in the direction that reduces the loss.

### 5. Testing
- **`testing_inputs`:** A new set of inputs is defined to evaluate the trained model.
- **Forward Pass for Testing:** The `testing_inputs` are passed through the *trained* network (using the updated `w1, b1, w2, b2`) to generate `predictions`.
- The `predictions` are then printed, showing the model's output for unseen data.

## Explanation of the PyTorch Implementation (Cell `tKUkhLKDG564`)

This code implements the same simple, single-hidden-layer neural network using the PyTorch framework. PyTorch simplifies the process by providing modules for layers, loss functions, and optimizers, automating much of the gradient calculation (autograd).

### 1. Imports
- **`torch`**: The main PyTorch library.
- **`torch.nn` (neural network)**: Contains modules for building neural network layers (e.g., `Linear`, `Sigmoid`).
- **`torch.optim` (optimizer)**: Provides optimization algorithms (e.g., `SGD`).

### 2. Data Preparation
- **Input and Output Data:** `x` and `y` are defined as `torch.tensor` objects, which are PyTorch's equivalent of NumPy arrays, but they can track gradients.

### 3. Define Neural Network (`SimpleNN` class)
- **Inherits `nn.Module`**: All PyTorch neural networks are defined as classes that inherit from `nn.Module`. This provides the basic functionality for tracking parameters and submodules.
- **`__init__(self)` constructor:**
  - `super(SimpleNN, self).__init__()`: Calls the constructor of the parent `nn.Module` class.
  - `self.hidden = nn.Linear(1, 1)`: Defines the hidden layer. `nn.Linear(in_features, out_features)` creates a linear transformation (weight multiplication + bias addition). Here, 1 input feature maps to 1 output feature (neuron).
  - `self.output = nn.Linear(1, 1)`: Defines the output layer, also a linear transformation.
  - `self.sigmoid = nn.Sigmoid()`: Initializes the sigmoid activation function.
- **`forward(self, x)` method:**
  - This method defines how data flows through the network.
  - `a1 = self.sigmoid(self.hidden(x))`: The input `x` passes through the hidden linear layer, and then the sigmoid activation is applied.
  - `out = self.sigmoid(self.output(a1))`: The output of the hidden layer (`a1`) passes through the output linear layer, followed by another sigmoid activation to produce the final `out`put.

### 4. Model, Loss, and Optimizer Initialization
- **`model = SimpleNN()`**: Creates an instance of our neural network.
- **`criterion = nn.MSELoss()`**: Defines the Mean Squared Error (MSE) loss function. This is equivalent to `np.mean((y - output)**2)` in the NumPy example.
- **`optimizer = optim.SGD(model.parameters(), lr=0.1)`**: Initializes the Stochastic Gradient Descent (SGD) optimizer. It takes `model.parameters()` (which are all the trainable weights and biases defined in `nn.Linear` layers) and the `learning_rate` as arguments. This optimizer will handle updating the parameters based on gradients.

### 5. Training Loop (`for epoch in range(epochs):`)

#### a. Forward Pass
- `output = model(x)`: Feeds the input data `x` through the `model`. This implicitly calls the `forward` method of the `SimpleNN` class.
- `loss = criterion(output, y)`: Calculates the loss between the model's `output` and the true `y` using the defined `MSELoss` criterion.

#### b. Backpropagation and Parameter Update
- **`optimizer.zero_grad()`**: Clears the gradients of all optimized `torch.Tensor`s. It's crucial to do this at the start of each epoch, as PyTorch accumulates gradients by default.
- **`loss.backward()`**: This is the magic of PyTorch's autograd. It automatically computes the gradients of the `loss` with respect to all parameters that have `requires_grad=True` (which `nn.Linear` layers automatically do).
- **`optimizer.step()`**: Performs a single optimization step (parameter update) based on the computed gradients and the `learning_rate`. This updates the model's weights and biases.

### 6. Testing
- **`testing_inputs = torch.tensor([...])`**: A new set of inputs is defined as a PyTorch tensor.
- **`predictions = model(testing_inputs)`**: The trained `model` is used to make predictions on the `testing_inputs`.
- **`print(predictions.detach())`**: The `.detach()` method is used here to remove the `predictions` tensor from the computation graph. This is good practice when you just want to view or use the tensor's value without needing to track its gradients further (e.g., for printing or converting to NumPy). It prevents unnecessary memory usage and computations related to gradients.