# Imports

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchensemble import VotingClassifier
# Extras
import time
# Torch Ensemble imports
from torchensemble import VotingClassifier  # voting is a classic ensemble strategy

In [2]:
# params
# Set training parameters
batch_size = 64
learning_rate = 0.001
weight_decay = 1e-4
epochs = 15

base_estimator = nn.Sequential(
    nn.Flatten(),            # Flatten the 28x28 input images to a vector of 784
    nn.Linear(28 * 28, 256), # First fully connected layer
    nn.ReLU(),               # Activation function
    nn.Linear(256, 128),     # Second fully connected layer
    nn.ReLU(),               # Activation function
    nn.Linear(128, 10)       # Output layer for 10 classes
)
# If using GPU, transfer data to the GPU inside fit() and evaluate() loops
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

# Set the base estimator to the device
base_estimator.to(device)

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=256, bias=True)
  (2): ReLU()
  (3): Linear(in_features=256, out_features=128, bias=True)
  (4): ReLU()
  (5): Linear(in_features=128, out_features=10, bias=True)
)

In [3]:
# Load data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))  # Normalize using MNIST mean and std
])
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [4]:
# Define the ensemble
ensemble = VotingClassifier(
    estimator=base_estimator,  # here is your deep learning model
    n_estimators=5,           # number of base estimators
    cuda=torch.cuda.is_available()  # Enable GPU if available
)

# Set the criterion
criterion = nn.CrossEntropyLoss()  # training objective
ensemble.set_criterion(criterion)

# Set the optimizer
ensemble.set_optimizer(
    "Adam",                       # type of parameter optimizer
    lr=learning_rate,             # learning rate of parameter optimizer
    weight_decay=weight_decay,    # weight decay of parameter optimizer
)

# Set the learning rate scheduler
ensemble.set_scheduler(
    "CosineAnnealingLR",          # type of learning rate scheduler
    T_max=epochs,                 # number of epochs for decay
)

# Train the ensemble

In [5]:
start_time = time.time()
ensemble.fit(
    train_loader,
    epochs=epochs,                # number of training epochs
)
end_time = time.time()
print(f"Time taken: {end_time-start_time:.3f} seconds")

Estimator: 000 | Epoch: 000 | Batch: 000 | Loss: 2.31229 | Correct: 3/64
Estimator: 000 | Epoch: 000 | Batch: 100 | Loss: 0.18821 | Correct: 62/64
Estimator: 000 | Epoch: 000 | Batch: 200 | Loss: 0.24857 | Correct: 59/64
Estimator: 000 | Epoch: 000 | Batch: 300 | Loss: 0.18057 | Correct: 58/64
Estimator: 000 | Epoch: 000 | Batch: 400 | Loss: 0.22935 | Correct: 59/64
Estimator: 000 | Epoch: 000 | Batch: 500 | Loss: 0.30720 | Correct: 58/64
Estimator: 000 | Epoch: 000 | Batch: 600 | Loss: 0.02711 | Correct: 64/64
Estimator: 000 | Epoch: 000 | Batch: 700 | Loss: 0.19426 | Correct: 58/64
Estimator: 000 | Epoch: 000 | Batch: 800 | Loss: 0.14643 | Correct: 62/64
Estimator: 000 | Epoch: 000 | Batch: 900 | Loss: 0.13720 | Correct: 61/64
Estimator: 001 | Epoch: 000 | Batch: 000 | Loss: 2.32213 | Correct: 3/64
Estimator: 001 | Epoch: 000 | Batch: 100 | Loss: 0.32895 | Correct: 59/64
Estimator: 001 | Epoch: 000 | Batch: 200 | Loss: 0.41220 | Correct: 58/64
Estimator: 001 | Epoch: 000 | Batch: 300

In [17]:
# Evaluate the ensemble
acc = ensemble.evaluate(test_loader)  # testing accuracy
print(f"Test Accuracy: {acc:.2f}%")


ZeroDivisionError: division by zero