In [8]:
pip install torch torchvision


Collecting torch
  Downloading torch-2.2.2-cp311-cp311-win_amd64.whl.metadata (26 kB)
Collecting torchvision
  Downloading torchvision-0.17.2-cp311-cp311-win_amd64.whl.metadata (6.6 kB)
Downloading torch-2.2.2-cp311-cp311-win_amd64.whl (198.6 MB)
   ---------------------------------------- 0.0/198.6 MB ? eta -:--:--
   ---------------------------------------- 0.0/198.6 MB 640.0 kB/s eta 0:05:11
   ---------------------------------------- 0.1/198.6 MB 1.3 MB/s eta 0:02:34
   ---------------------------------------- 0.2/198.6 MB 1.9 MB/s eta 0:01:46
   ---------------------------------------- 0.4/198.6 MB 2.1 MB/s eta 0:01:33
   ---------------------------------------- 0.5/198.6 MB 2.4 MB/s eta 0:01:23
   ---------------------------------------- 0.7/198.6 MB 2.4 MB/s eta 0:01:22
   ---------------------------------------- 0.8/198.6 MB 2.5 MB/s eta 0:01:19
   ---------------------------------------- 1.0/198.6 MB 2.6 MB/s eta 0:01:16
   ---------------------------------------- 1.1/198.6 MB

In [11]:
import torch
import torch.nn as nn

class FNN(nn.Module):
    def __init__(self):
        super().__init__()
        # Define dimensions for input, hidden, and output layers
        self.input_dim = 2
        self.hidden_dim = 32
        self.output_dim = 1

        # Set the learning rate
        self.learning_rate = 0.001

        # Initialize weights (parameters)
        self.w1 = torch.randn(self.input_dim, self.hidden_dim)  # w1: 2 x 32
        self.w2 = torch.randn(self.hidden_dim, self.output_dim)  # w2: 32 x 1

    def sigmoid(self, s):
        return 1 / (1 + torch.exp(-s))

    def forward(self, X):
        # First linear layer
        self.y1 = torch.matmul(X, self.w1)
        # Apply sigmoid activation
        self.y2 = self.sigmoid(self.y1)
        # Second linear layer
        self.y3 = torch.matmul(self.y2, self.w2)
        # Apply sigmoid activation to get final output
        y4 = self.sigmoid(self.y3)
        return y4

    def backward(self, X, labels, y4):
        # Compute the derivative of binary cross-entropy cost w.r.t. final output y4
        dC_dy4 = y4 - labels

        # Compute gradients for w2
        dy4_dy3 = self.sigmoid(self.y3) * (1 - self.sigmoid(self.y3))
        dy3_dw2 = self.y2
        y4_delta = dC_dy4 * dy4_dy3
        dC_dw2 = torch.matmul(torch.t(self.y2), y4_delta)

        # Compute gradients for w1
        dy3_dy2 = self.w2
        dy2_dy1 = self.y2 * (1 - self.y2)
        y2_delta = torch.matmul(y4_delta, torch.t(dy3_dy2)) * dy2_dy1
        dC_dw1 = torch.matmul(torch.t(X), y2_delta)

        # Update weights using gradient descent
        self.w1 -= self.learning_rate * dC_dw1
        self.w2 -= self.learning_rate * dC_dw2

    def train(self, X, labels, num_epochs=1000):
        for epoch in range(num_epochs):
            # Forward propagation
            y4 = self.forward(X)
            # Backward propagation and weight updates
            self.backward(X, labels, y4)

# Example usage:
X_train = torch.tensor([[0.1, 0.2], [0.3, 0.4]])  # Input data
labels = torch.tensor([[0.8], [0.9]])  # Ground truth labels

model = FNN()
model.train(X_train, labels)
