## Problem 1: The "Sequential" Architecture


**Your Challenge:**
Implement the architecture from **Figure 9** of the article.
*   **Input:** 50 features.
*   **Hidden 1:** 30 neurons + ReLU.
*   **Hidden 2:** 20 neurons + ReLU.
*   **Output:** 3 neurons (for a 3-class classification problem).

```python
import torch
import torch.nn as nn

class NeuralNetwork(nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super().__init__()
        # TASK: Use nn.Sequential to stack:
        # Linear(num_inputs, 30) -> ReLU -> Linear(30, 20) -> ReLU -> Linear(20, num_outputs)
        self.layers = nn.Sequential(
            # Your code here
        )

    def forward(self, x):
        return self.layers(x)


In [14]:
import torch
import torch.nn as nn

class NeuralNetwork(nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super().__init__()
        # Linear(num_inputs, 30) -> ReLU -> Linear(30, 20) -> ReLU -> Linear(20, num_outputs)
        self.layers = nn.Sequential(
            nn.Linear(num_inputs,30),
			nn.ReLU(),
			
			nn.Linear(30,20),
			nn.ReLU(),
			
			nn.Linear(20,num_outputs),
			
        )

    def forward(self, x):
        return self.layers(x)

model = NeuralNetwork(num_inputs=10, num_outputs=2)
print(f'Model: {model}')
print(f"Number of trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")

Model: NeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=10, out_features=30, bias=True)
    (1): ReLU()
    (2): Linear(in_features=30, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=2, bias=True)
  )
)
Number of trainable parameters: 992


## Problem 2: Custom Dataset

**Your Challenge:**
Implement the `CustomDataset` from the article, but add a **First Principle** check.

```python
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.features = X
        self.labels = y

    def __len__(self):
        # Must return the number of samples
        pass

    def __getitem__(self, index):
        # Must return (features[index], labels[index])
        pass

# Setup Data
X_train = torch.randn(100, 50) # 100 samples, 50 features
y_train = torch.randint(0, 3, (100,)) # 3 classes: 0, 1, 2

train_ds = CustomDataset(X_train, y_train)

# TASK: Create a DataLoader
# Set batch_size=10, shuffle=True, and drop_last=True
train_loader = DataLoader(train_ds, batch_size=10, shuffle=True, drop_last=True)
```


In [17]:
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.features = X
        self.labels = y

    def __len__(self):
        return self.features.shape[0]
    
    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]
    

X_train = torch.randn(100, 50)
y_train = torch.randint(0,2, (100,))

train_dataset = CustomDataset(X_train, y_train)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, drop_last=True)

print(f"length of dataset: {len(train_dataset)} \n")
for batch_idx, (features, labels) in enumerate(train_loader):
    print(f"Batch no: {batch_idx+1}")
    print(f"Features shape: {features.shape} \nLabels shape: {labels.shape}\n ")

length of dataset: 100 

Batch no: 1
Features shape: torch.Size([16, 50]) 
Labels shape: torch.Size([16])
 
Batch no: 2
Features shape: torch.Size([16, 50]) 
Labels shape: torch.Size([16])
 
Batch no: 3
Features shape: torch.Size([16, 50]) 
Labels shape: torch.Size([16])
 
Batch no: 4
Features shape: torch.Size([16, 50]) 
Labels shape: torch.Size([16])
 
Batch no: 5
Features shape: torch.Size([16, 50]) 
Labels shape: torch.Size([16])
 
Batch no: 6
Features shape: torch.Size([16, 50]) 
Labels shape: torch.Size([16])
 


## Problem 3: The Standard Training Loop
This is where everything we learned (Autograd, Modules, Data) comes together. 

**Your Challenge:**
Train the model for 5 epochs.

```python
import torch.nn.functional as F

optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

for epoch in range(5):
    model.train() # Principle: Set to training mode
    
    for batch_idx, (features, labels) in enumerate(train_loader):
        # 1. Forward Pass: Get logits
        
        # 2. Loss: Use F.cross_entropy(logits, labels)
        # Why CrossEntropy? (Hint: It combines Softmax + LogLoss internally)
        
        # 3. Backward Pass: Calculate gradients
        
        # 4. Update: Step and Zero_grad
        
        if batch_idx % 5 == 0:
            print(f"Epoch {epoch} | Batch {batch_idx} | Loss {loss.item():.4f}")
```