#### Sequential
- Simple and concise for linear architecture
- Limited to layer stacking
#### Functional API
- Flexible but requires more code.
- Allows any custom computation logic

# nn.Sequential

In [1]:
import torch
import torch.nn as nn

In [3]:
# Define the neural network using sequential

# Input as 20 features i.e. X
# Ouput is 5 Taget variable / classes i.e. y

sequential_model = nn.Sequential(
    nn.Linear(20,64), # Input layer (20 -> 64)
    nn.ReLU(),
    nn.Linear(64, 32),
    nn.ReLU(),
    nn.Linear(32, 5),
)

print(sequential_model)

input_data = torch.randn(10, 20) # 10 rows, 20 is my feature size in each row
print(input_data)
output = sequential_model(input_data)
print(output)


Sequential(
  (0): Linear(in_features=20, out_features=64, bias=True)
  (1): ReLU()
  (2): Linear(in_features=64, out_features=32, bias=True)
  (3): ReLU()
  (4): Linear(in_features=32, out_features=5, bias=True)
)
tensor([[ 1.3792, -0.8050,  0.8202,  1.0857, -1.1947, -0.2747,  1.2555, -1.4607,
         -1.1871, -0.4453, -0.3349, -0.5531, -1.4094,  0.3556, -1.9384,  0.6041,
         -1.2085, -0.4252,  0.6783, -1.3416],
        [ 0.8821,  0.1486, -0.8599, -0.0909, -0.3296,  1.6787, -0.2116, -0.9837,
          0.1286,  1.0199, -0.5117, -1.3837, -1.3781, -0.7491,  0.3319, -0.3084,
          0.8719, -0.2913, -0.4170,  0.6776],
        [ 1.7515,  0.9799,  1.2805,  0.3414, -0.3885, -1.4075, -0.0491, -0.1477,
         -0.2375,  1.8623, -0.2085, -0.4303,  2.3390, -2.7953,  0.7896, -0.3738,
          1.0465, -0.8128,  2.2683, -0.3204],
        [-1.1445,  0.8609,  1.7220,  1.2364,  0.8231, -1.3193, -0.4343, -0.9922,
         -2.0522, -1.3626,  0.0422,  0.8576,  0.6826,  0.5363,  0.4497,  0.9144,

In [4]:
print(f"Model weights {sequential_model[0].weight.data}")
print(f"Model bias {sequential_model[0].bias.data}")


Model weights tensor([[ 0.1148,  0.1979, -0.1780,  ...,  0.0629,  0.0211,  0.1893],
        [-0.0113, -0.0232, -0.0880,  ..., -0.2119,  0.0171,  0.1187],
        [-0.2016,  0.1326, -0.2087,  ..., -0.0729, -0.0370,  0.0326],
        ...,
        [ 0.0356,  0.0332, -0.2096,  ...,  0.0180, -0.0584,  0.0862],
        [-0.0782,  0.1004,  0.1364,  ..., -0.1227,  0.1765,  0.0531],
        [ 0.1520, -0.0781, -0.0729,  ..., -0.1474, -0.1418,  0.1402]])
Model bias tensor([-0.1639,  0.1367,  0.1932,  0.1807, -0.2230, -0.0543, -0.0113,  0.1362,
        -0.0735,  0.0396, -0.0729,  0.0746,  0.0559, -0.0980, -0.1120,  0.1159,
        -0.2035,  0.1752, -0.1363, -0.1008, -0.1052,  0.1611, -0.0720, -0.1502,
        -0.2020,  0.1059,  0.1743,  0.1956, -0.1695,  0.1237,  0.1045,  0.0031,
         0.1091, -0.1208, -0.0582, -0.0797,  0.1736,  0.0232, -0.0249,  0.1967,
        -0.0964, -0.1426, -0.1628, -0.1456, -0.0014, -0.1502,  0.1106,  0.2087,
         0.0194,  0.0561, -0.0482, -0.0713,  0.1363,  0.0793,

# Functional API

In [5]:
class FunctionalModel(nn.Module):
  def __init__(self):
    super(FunctionalModel, self).__init__()
    self.fc1 = nn.Linear(20, 64)
    self.fc2 = nn.Linear(64, 32)
    self.fc3 = nn.Linear(32, 5)
    self.relu = nn.ReLU()

  def forward(self, X):
    x = self.relu(self.fc1(X))
    x = self.relu(self.fc2(x))
    x = self.fc3(x)
    return x

functional_model = FunctionalModel()

print(functional_model)

input_data = torch.randn(10, 20) # 10 rows, 20 is my feature size in each row
print(input_data)
output = functional_model(input_data)
print(output)

FunctionalModel(
  (fc1): Linear(in_features=20, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=5, bias=True)
  (relu): ReLU()
)
tensor([[-1.3070,  0.1032,  0.5568, -0.4147,  1.0392,  0.3329,  0.0384, -1.6081,
          0.1327,  0.3418,  1.1015,  0.9654,  2.3364,  0.2639, -0.6385,  0.1314,
         -0.3309, -1.6550,  0.0688, -1.3912],
        [ 0.2445,  1.1699,  0.2018,  0.3214,  0.5044, -0.5608,  0.0649, -0.6566,
         -1.4461, -0.5227, -0.0259,  0.0677,  1.2339,  0.8213, -2.8294,  0.6835,
          0.1291,  0.7535, -0.1602, -0.5180],
        [ 1.1399,  0.8217,  0.7584, -0.1130, -1.9548, -0.3504,  1.8159, -0.6329,
          1.1205,  0.3743,  0.0072,  0.3930, -0.3645, -0.9858, -0.3790, -0.2466,
          0.2417,  1.0879, -0.6716,  1.0289],
        [ 0.6977, -1.4034, -0.5170, -1.1629, -1.3438,  0.0726, -0.5969,  2.1744,
          0.7602, -0.6274, -0.1978,  0.6048,  2.6544,  0.0487,  2.1333,  0.2672,

# Multi-Class Classification with PyTorch

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [7]:
# Set device (CPU or GPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [8]:
# Load and preprocess the dataset
iris = load_iris()
X = iris.data  # Features (numerical data)
y = iris.target  # Labels (3 classes)

In [9]:
# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [10]:
# Convert data to tensors
X = torch.tensor(X, dtype=torch.float32).to(device)
y = torch.tensor(y, dtype=torch.long).to(device)  # Multi-class requires LongTensor for target

In [11]:
# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
print(X_test.shape)

print("="*60)

print(X_test[:,:])
print(y_test[:])

print("="*60)
print(X_test[0,:])
print(y_test[0])

torch.Size([30, 4])
tensor([[ 3.1100e-01, -5.9237e-01,  5.3541e-01,  8.7755e-04],
        [-1.7367e-01,  1.7096e+00, -1.1697e+00, -1.1838e+00],
        [ 2.2497e+00, -1.0528e+00,  1.7858e+00,  1.4488e+00],
        [ 1.8983e-01, -3.6218e-01,  4.2173e-01,  3.9577e-01],
        [ 1.1592e+00, -5.9237e-01,  5.9225e-01,  2.6414e-01],
        [-5.3718e-01,  7.8881e-01, -1.2834e+00, -1.0522e+00],
        [-2.9484e-01, -3.6218e-01, -8.9803e-02,  1.3251e-01],
        [ 1.2803e+00,  9.8217e-02,  7.6276e-01,  1.4488e+00],
        [ 4.3217e-01, -1.9736e+00,  4.2173e-01,  3.9577e-01],
        [-5.2506e-02, -8.2257e-01,  8.0709e-02,  8.7755e-04],
        [ 7.9567e-01,  3.2841e-01,  7.6276e-01,  1.0539e+00],
        [-1.2642e+00, -1.3198e-01, -1.3402e+00, -1.4471e+00],
        [-4.1601e-01,  1.0190e+00, -1.3971e+00, -1.3154e+00],
        [-1.1430e+00,  9.8217e-02, -1.2834e+00, -1.4471e+00],
        [-9.0068e-01,  1.7096e+00, -1.2834e+00, -1.1838e+00],
        [ 5.5333e-01,  5.5861e-01,  5.3541e-01,  5

In [13]:
# Define the neural network model
class NeuralNetBasic(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNetBasic, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)  # First fully connected layer
        self.relu = nn.ReLU()  # Activation function
        self.fc2 = nn.Linear(hidden_size, num_classes)  # Output layer for classification

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

# Model parameters
input_size = X_train.shape[1]  # Number of features (4 for Iris)
hidden_size = 16  # Arbitrary hidden layer size
num_classes = 3  # Number of output classes (3 for Iris)

# Instantiate the model
model = NeuralNetBasic(input_size, hidden_size, num_classes).to(device)

In [14]:
# Neural network model with multiple layers
class NeuralNetAdvance(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, hidden_size3, num_classes):
        super(NeuralNetAdvance, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)  # First hidden layer
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)  # Second hidden layer
        self.fc3 = nn.Linear(hidden_size2, hidden_size3)  # Third hidden layer
        self.fc4 = nn.Linear(hidden_size3, num_classes)  # Output layer
        self.relu = nn.ReLU()  # ReLU activation function
        self.dropout = nn.Dropout(p=0.5)  # Dropout for regularization

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)  # Apply dropout to the first hidden layer

        x = self.fc2(x)
        x = self.relu(x)
        x = self.dropout(x)  # Apply dropout to the second hidden layer

        x = self.fc3(x)
        x = self.relu(x)

        x = self.fc4(x)  # Output layer (no activation, as CrossEntropyLoss applies softmax)
        return x

# Model parameters
input_size = X_train.shape[1]  # Number of features (4 for Iris)
hidden_size1 = 32  # First hidden layer size
hidden_size2 = 64  # Second hidden layer size
hidden_size3 = 32  # Third hidden layer size
num_classes = 3  # Number of output classes (3 for Iris)

# Instantiate the model
model = NeuralNetAdvance(input_size, hidden_size1, hidden_size2, hidden_size3, num_classes).to(device)


In [15]:
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()  # Suitable for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer

In [16]:
# Train the model
num_epochs = 100  # Number of training iterations
batch_size = 16  # Batch size for training

In [18]:
def train_model(X_train, y_train):
    model.train()
    for epoch in range(num_epochs):
        # Forward pass
        outputs = model(X_train)
        loss = criterion(outputs, y_train)

        # Backward pass and optimization
        optimizer.zero_grad()  # Clear gradients
        loss.backward()  # Backpropagation
        optimizer.step()  # Update model parameters

        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# How the Model Handles Batches?

In the forward pass, PyTorch automatically handles multiple inputs (batches). This happens because the operations like matrix multiplication, addition, activation functions, etc., are all vectorized, meaning they are performed on the entire batch simultaneously.

In [19]:
# Train the model on the training data
train_model(X_train, y_train)

Epoch [10/100], Loss: 1.0639
Epoch [20/100], Loss: 1.0031
Epoch [30/100], Loss: 0.8829
Epoch [40/100], Loss: 0.7744
Epoch [50/100], Loss: 0.6520
Epoch [60/100], Loss: 0.5312
Epoch [70/100], Loss: 0.4683
Epoch [80/100], Loss: 0.4226
Epoch [90/100], Loss: 0.3878
Epoch [100/100], Loss: 0.3309


In [20]:
# Evaluate the model
model.eval()  # Set model to evaluation mode (no gradients)

NeuralNetAdvance(
  (fc1): Linear(in_features=4, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=32, bias=True)
  (fc4): Linear(in_features=32, out_features=3, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.5, inplace=False)
)

In [21]:
with torch.no_grad():  # No need to compute gradients during testing
    test_outputs = model(X_test)
    _, predicted = torch.max(test_outputs, 1)  # Get the class with highest probability
    accuracy = (predicted == y_test).sum().item() / y_test.size(0)
    print(f'Accuracy on the test set: {accuracy * 100:.2f}%')

Accuracy on the test set: 93.33%


In [22]:
# Example of prediction on new data
new_data = torch.tensor([[5.1, 3.5, 1.4, 0.2], [6.5, 3.0, 5.5, 1.8]], dtype=torch.float32).to(device)
new_data = torch.tensor(scaler.transform(new_data.cpu()), dtype=torch.float32).to(device)
print(new_data)

tensor([[-0.9007,  1.0190, -1.3402, -1.3154],
        [ 0.7957, -0.1320,  0.9901,  0.7907]])


* scaler.transform(new_data.cpu()) scales the data and returns a NumPy array.
* torch.tensor(...) converts the NumPy array back to a PyTorch tensor.
* .to(device) moves the tensor to the appropriate device (CPU or GPU).

In [29]:
with torch.no_grad():
    predictions = model(new_data)
    print(predictions)
    _, predicted_classes = torch.max(predictions,1)
    print("Predicted classes for new data:", predicted_classes.cpu().numpy())

tensor([[ 4.9945, -3.1014, -3.4917],
        [-2.8206,  2.0504,  3.0794]])
Predicted classes for new data: [0 2]
