# Basic Neural Network with PyTorch â€” Iris Dataset

1. Load and inspect data  
2. Train / validation / test split  
3. Data preprocessing (scaling)  
4. Build a basic neural network (MLP)  
5. Training loop  
6. Testing and evaluation  

The Iris dataset is fully numeric and clean, so preprocessing focuses on **scaling and splitting**.

In [1]:
import numpy as np
import pandas as pd 

# sklearn
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# PyTorch
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

## 1) Load and Inspect the Iris Dataset

In [2]:
iris = load_iris()
X = iris.data
y = iris.target

feature_names = iris.feature_names
target_names = iris.target_names

df = pd.DataFrame(X, columns=feature_names)
df["label"] = y

df.head(), df["label"].value_counts()

(   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
 0                5.1               3.5                1.4               0.2   
 1                4.9               3.0                1.4               0.2   
 2                4.7               3.2                1.3               0.2   
 3                4.6               3.1                1.5               0.2   
 4                5.0               3.6                1.4               0.2   
 
    label  
 0      0  
 1      0  
 2      0  
 3      0  
 4      0  ,
 label
 0    50
 1    50
 2    50
 Name: count, dtype: int64)

## 2) Train / Validation / Test Split

We split first to avoid data leakage.

In [3]:
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.30, random_state=42, stratify=y
)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.50, random_state=42, stratify=y_temp
)

print("Train size:", len(X_train))
print("Val size:  ", len(X_val))
print("Test size: ", len(X_test))

Train size: 105
Val size:   22
Test size:  23


## 3) Data Preprocessing: Feature Scaling

Neural networks are sensitive to feature scales.
We **fit the scaler on training data only**, then apply it to validation and test sets.

In [4]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled   = scaler.transform(X_val)
X_test_scaled  = scaler.transform(X_test)

X_train_scaled.mean(axis=0), X_train_scaled.std(axis=0)

(array([ 2.38327876e-15, -1.12145742e-15, -1.37456184e-16, -6.97854473e-17]),
 array([1., 1., 1., 1.]))

## 4) Convert to PyTorch Tensors and DataLoaders

In [5]:
X_train_t = torch.tensor(X_train_scaled, dtype=torch.float32)
X_val_t   = torch.tensor(X_val_scaled,   dtype=torch.float32)
X_test_t  = torch.tensor(X_test_scaled,  dtype=torch.float32)

y_train_t = torch.tensor(y_train, dtype=torch.long)
y_val_t   = torch.tensor(y_val,   dtype=torch.long)
y_test_t  = torch.tensor(y_test,  dtype=torch.long)

train_loader = DataLoader(TensorDataset(X_train_t, y_train_t),
                          batch_size=16, shuffle=True)
val_loader   = DataLoader(TensorDataset(X_val_t, y_val_t),
                          batch_size=32, shuffle=False)
test_loader  = DataLoader(TensorDataset(X_test_t, y_test_t),
                          batch_size=32, shuffle=False)

next(iter(train_loader))

[tensor([[-0.0854, -0.7807,  0.1775, -0.2658],
         [ 0.4968,  0.5379,  1.2476,  1.6726],
         [-0.9005,  0.9775, -1.3431, -1.2996],
         [-0.7840,  2.2961, -1.2868, -1.4289],
         [-0.9005,  0.7577, -1.2868, -1.2996],
         [ 1.0790,  0.3181,  1.1913,  1.4141],
         [-0.6676,  1.4170, -1.2868, -1.2996],
         [-0.2018, -1.0005, -0.1604, -0.2658],
         [ 1.5448,  0.3181,  1.2476,  0.7680],
         [-0.3183, -0.5609,  0.6281,  1.0264],
         [-0.2018, -0.1214,  0.2339, -0.0074],
         [-1.1333, -0.1214, -1.3431, -1.2996],
         [ 2.3598,  1.6368,  1.4729,  1.0264],
         [ 0.9626, -0.1214,  0.7971,  1.4141],
         [-0.5511,  1.8565, -1.1742, -1.0412],
         [-1.0169,  0.7577, -1.2305, -1.0412]]),
 tensor([1, 2, 0, 0, 0, 2, 0, 1, 2, 2, 1, 0, 2, 2, 0, 0])]

## 5) Define a Basic Neural Network (MLP)

In [6]:
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_classes):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, num_classes)
        )

    def forward(self, x):
        return self.net(x)

model = MLP(input_dim=4, hidden_dim=32, num_classes=3)
model

MLP(
  (net): Sequential(
    (0): Linear(in_features=4, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=3, bias=True)
  )
)

## 6) Training Setup

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)

device

device(type='cpu')

## 7) Training Loop (with Validation Accuracy)

In [8]:
@torch.no_grad()
def evaluate_accuracy(loader):
    model.eval()
    correct, total = 0, 0
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        logits = model(xb)
        pred = logits.argmax(dim=1)
        correct += (pred == yb).sum().item()
        total += yb.size(0)
    return correct / total

def train_one_epoch(loader):
    model.train()
    total_loss = 0.0
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

n_epochs = 100
for epoch in range(1, n_epochs + 1):
    loss = train_one_epoch(train_loader)
    val_acc = evaluate_accuracy(val_loader)
    if epoch % 20 == 0 or epoch == 1:
        print(f"Epoch {epoch:03d} | Loss: {loss:.4f} | Val Acc: {val_acc:.3f}")

Epoch 001 | Loss: 0.8642 | Val Acc: 0.818
Epoch 020 | Loss: 0.0338 | Val Acc: 0.909
Epoch 040 | Loss: 0.0072 | Val Acc: 0.909
Epoch 060 | Loss: 0.0017 | Val Acc: 0.909
Epoch 080 | Loss: 0.0010 | Val Acc: 0.909
Epoch 100 | Loss: 0.0007 | Val Acc: 0.909


## 8) Final Test Evaluation

In [9]:
@torch.no_grad()
def predict(loader):
    model.eval()
    y_true, y_pred = [], []
    for xb, yb in loader:
        xb = xb.to(device)
        logits = model(xb)
        pred = logits.argmax(dim=1)
        y_true.append(yb.numpy())
        y_pred.append(pred.cpu().numpy())
    return np.concatenate(y_true), np.concatenate(y_pred)

y_true, y_pred = predict(test_loader)

print("Test accuracy:", accuracy_score(y_true, y_pred))
print("Classification report:", classification_report(y_true, y_pred, target_names=target_names))
print("Confusion matrix:", confusion_matrix(y_true, y_pred))

Test accuracy: 0.9565217391304348
Classification report:               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00         7
  versicolor       0.89      1.00      0.94         8
   virginica       1.00      0.88      0.93         8

    accuracy                           0.96        23
   macro avg       0.96      0.96      0.96        23
weighted avg       0.96      0.96      0.96        23

Confusion matrix: [[7 0 0]
 [0 8 0]
 [0 1 7]]
