# Load libraries

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

# Set the Device

You should determine if a GPU is available and set your device accordingly.

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device")

Using cuda device


In [3]:
torch.cuda.is_available()

True

# Load the Iris dataset

In [4]:
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Split the dataset into a training set and a test set

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert datasets to PyTorch tensors

In PyTorch, the .to(device) method is used to explicitly move tensors or models to a specific device, either the CPU or a GPU. When you're training neural networks, especially deep ones, computational requirements can be high, and utilizing a GPU can significantly speed up the training process.

In [6]:
X_train = torch.FloatTensor(X_train).to(device)
y_train = torch.LongTensor(y_train).to(device)
X_test = torch.FloatTensor(X_test).to(device)
y_test = torch.LongTensor(y_test).to(device)


# Define the neural network structure

Creating a class is a recommended way to define models in PyTorch. The class-based structure allows for organized, modular, and scalable code. You can create more straightforward models using just functions, but using classes provides greater flexibility, especially for complex architectures.

### Notes:
- 12 and 8 are somewhat arbitrary numbers. In practice, choosing the number of neurons and layers often involves experimentation.
- `nn.Linear` denotes fully connected layers, where each neuron from the previous layer connects to every neuron in the current layer.
- ReLU (Rectified Linear Unit) is a popular choice for hidden layers due to its simplicity and effectiveness.
- The last layer often doesn't use an activation function because the choice of loss function in the next step (criterion) sometimes includes it.
  - For classification tasks with multiple classes, `CrossEntropyLoss` in PyTorch combines a SoftMax activation with a cross-entropy loss.

In [7]:
class IrisNet(nn.Module):
    def __init__(self):
        super(IrisNet, self).__init__()
        self.fc1 = nn.Linear(4, 12)    # First hidden layer with 12 neurons
        self.fc2 = nn.Linear(12, 8)   # Second hidden layer with 8 neurons
        self.fc3 = nn.Linear(8, 3)    # Output layer with 3 neurons (for the 3 classes)

    def forward(self, x):
        x = torch.relu(self.fc1(x))  # Apply ReLU activation function after first hidden layer
        x = torch.relu(self.fc2(x))  # Apply ReLU activation function after second hidden layer
        x = self.fc3(x)              # No activation here as we'll use CrossEntropyLoss
        return x

In [8]:
model = IrisNet().to(device)

# Define loss function and optimizer

In [9]:
criterion = nn.CrossEntropyLoss()               # This combines a SoftMax activation and a cross-entropy loss
optimizer = optim.Adam(model.parameters(), lr=0.01) # Adam optimizer with learning rate of 0.01

# Training loop

In [10]:
best_val_loss = float('inf')  # Start with a very high initial best loss
patience = 10  # Define how many epochs to wait without improvement
counter = 0  # Initialize counter

for epoch in range(500):   # Increased epochs to ensure convergence with raw data
    optimizer.zero_grad()  # Clear out the gradients from the last step
    out = model(X_train)   # Forward pass: compute predicted y by passing x to the model
    loss = criterion(out, y_train) # Compute the loss
    loss.backward()        # Backward pass: compute gradient of the loss with respect to model parameters
    optimizer.step()       # Update model parameters

    # Evaluate the model's performance on the validation data
    # Ensure no gradients are calculated during this step to save computation and memory
    with torch.no_grad():
        val_out = model(X_test) # Pass the validation data through the model to get predictions.
        val_loss = criterion(val_out, y_test) # Compute the validation loss based on the model's predictions and true labels of validation data.

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        counter = 0  # Reset the counter since we have observed an improvement in validation loss.
    else:
        counter += 1  # If validation loss didn't improve, increment the counter.

    # If the number of epochs without improvement exceeds our set patience, stop training.
    if counter >= patience:
        print("Early stopping due to no improvement!")
        break  # Exit the training loop

    if (epoch+1) % 50 == 0:  # Print the loss every 50 epochs
        print(f"Epoch {epoch+1}, Loss: {loss.item()}")

Epoch 50, Loss: 0.5409395098686218
Epoch 100, Loss: 0.08089379221200943
Epoch 150, Loss: 0.0559065006673336
Epoch 200, Loss: 0.05308171734213829
Epoch 250, Loss: 0.05149996280670166
Epoch 300, Loss: 0.050328973680734634
Epoch 350, Loss: 0.0493655726313591
Epoch 400, Loss: 0.04863674193620682
Epoch 450, Loss: 0.048053428530693054
Epoch 500, Loss: 0.047649189829826355


# Evaluate the model

In [11]:
with torch.no_grad(): # Disable gradient computation during evaluation to save memory and speed up the process
    test_out = model(X_test)  # Forward pass: compute predicted outputs by passing test data to the model
    _, predicted = torch.max(test_out, 1) # Get the class labels with the highest predicted probabilities
    accuracy = accuracy_score(y_test.cpu().numpy(), predicted.cpu().numpy()) # Calculate accuracy by comparing predicted and true labels
    print(f"Test Accuracy: {accuracy * 100:.2f}%")

Test Accuracy: 100.00%


# Deployment
For deploying a PyTorch model

In [12]:
# Save the model
torch.save(model.state_dict(), "iris_model.pth")

In [13]:
# Load the model for inference
model = IrisNet().to(device)
model.load_state_dict(torch.load("iris_model.pth"))
model.eval()  # Set the model to evaluation mode

IrisNet(
  (fc1): Linear(in_features=4, out_features=12, bias=True)
  (fc2): Linear(in_features=12, out_features=8, bias=True)
  (fc3): Linear(in_features=8, out_features=3, bias=True)
)

In [14]:
# Make a prediction on new data

# Suppose you have new data for prediction as a numpy array
new_data = [[5.1, 3.5, 1.4, 0.2],  # Some iris measurements
            [6.7, 3.0, 5.2, 2.3]]  # Another set of iris measurements

# Convert the data to a PyTorch tensor
input_tensor = torch.FloatTensor(new_data)

# If you used a GPU during training, move the input tensor to the same device
if torch.cuda.is_available():
    input_tensor = input_tensor.to('cuda')

# Get the model's predictions
with torch.no_grad():  # This ensures that the operation is not tracked by PyTorch's autograd
    outputs = model(input_tensor)

# Get the predicted classes
_, predicted_classes = torch.max(outputs, 1)

# Convert predicted classes to a list
predicted_classes = predicted_classes.tolist()

print(predicted_classes)  # This will give you the indices of the predicted classes for each input

[0, 2]


# TODO

Next time we will learn about hyperparameters in the context of neural networks.

Things we will be looking at:
- Learning rate
- Batch size
- Epochs
- Optimizer
- Network architecture
- Dropout
- Regularization
- Momentum