In [3]:
!pip install --upgrade pytorch-lightning torchmetrics
!pip install 'ray[tune]' 'ray[default]'

Collecting pytorch-lightning
  Downloading pytorch_lightning-2.2.0.post0-py3-none-any.whl (800 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.9/800.9 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torchmetrics
  Downloading torchmetrics-1.3.1-py3-none-any.whl (840 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m840.4/840.4 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities>=0.8.0 (from pytorch-lightning)
  Downloading lightning_utilities-0.10.1-py3-none-any.whl (24 kB)
Installing collected packages: lightning-utilities, torchmetrics, pytorch-lightning
Successfully installed lightning-utilities-0.10.1 pytorch-lightning-2.2.0.post0 torchmetrics-1.3.1
Collecting ray[tune]
  Downloading ray-2.9.2-cp310-cp310-manylinux2014_x86_64.whl (64.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.9/64.9 MB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
Collecting tensorboardX>=1.9 (from ray[tun

In [None]:
import pandas as pd
from sklearn.datasets import load_iris

# Load Iris dataset
iris = load_iris()

# Create a DataFrame
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
iris_df['target'] = iris.target

# Display the DataFrame
iris_df.sample(3)

In [35]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset

# Load and prepare the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Preprocess the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Convert arrays to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train)
X_test_tensor = torch.FloatTensor(X_test)
y_train_tensor = torch.LongTensor(y_train)
y_test_tensor = torch.LongTensor(y_test)

# Create datasets and dataloaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(dataset=train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=16, shuffle=False)

# Vanilla Pytorch




In [36]:


# Define a 3-layer Perceptron
class ThreeLayerPerceptron(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(ThreeLayerPerceptron, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.layer2 = nn.Linear(hidden_size, hidden_size)
        self.layer3 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.relu(self.layer1(x))
        out = self.relu(self.layer2(out))
        return self.layer3(out)

model = ThreeLayerPerceptron(input_size=4, hidden_size=100, num_classes=3)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop with accuracy calculation
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    for inputs, labels in train_loader:
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        total_loss += loss.item()

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    # Calculate average loss and accuracy
    avg_loss = total_loss / len(train_loader)
    accuracy = 100 * correct / total

    # Print every 10 epochs (or adjust to your preference)
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%')

print("Training complete")


Epoch [10/100], Loss: 0.1913, Accuracy: 94.17%
Epoch [20/100], Loss: 0.0817, Accuracy: 97.50%
Epoch [30/100], Loss: 0.0529, Accuracy: 98.33%
Epoch [40/100], Loss: 0.0433, Accuracy: 98.33%
Epoch [50/100], Loss: 0.0408, Accuracy: 98.33%
Epoch [60/100], Loss: 0.0404, Accuracy: 98.33%
Epoch [70/100], Loss: 0.0324, Accuracy: 98.33%
Epoch [80/100], Loss: 0.0437, Accuracy: 98.33%
Epoch [90/100], Loss: 0.0309, Accuracy: 99.17%
Epoch [100/100], Loss: 0.0268, Accuracy: 99.17%
Training complete


# Pytorch Lightning for Equivalent as Above


In [38]:
import pytorch_lightning as pl
from pytorch_lightning import LightningModule, Trainer
from torchmetrics.classification.accuracy import Accuracy

class LightningThreeLayerPerceptron(LightningModule):
    def __init__(self, input_size=4, hidden_size=100, num_classes=3):
        super().__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.layer2 = nn.Linear(hidden_size, hidden_size)
        self.layer3 = nn.Linear(hidden_size, num_classes)
        self.criterion = nn.CrossEntropyLoss()
        self.accuracy = Accuracy(task='multiclass', num_classes=num_classes, average='macro')

    def forward(self, x):
        x = torch.relu(self.layer1(x))
        x = torch.relu(self.layer2(x))
        return self.layer3(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.criterion(logits, y)
        preds = torch.argmax(logits, dim=1)
        acc = self.accuracy(preds, y)
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        self.log('train_acc', acc, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=0.001)
        return optimizer

    def train_dataloader(self):
        return train_loader

    def predict_step(self, batch, batch_idx, dataloader_idx=0):
        x, _ = batch  # Assuming your DataLoader yields a tuple (inputs, labels)
        logits = self(x)  # Forward pass to get logits
        # Optionally apply softmax for probabilities (not needed for just getting the class predictions)
        # probabilities = torch.softmax(logits, dim=1)
        return logits  # Or return `probabilities` if you applied softmax

# Initialize the PyTorch Lightning model
lightning_model = LightningThreeLayerPerceptron(input_size=4, hidden_size=100, num_classes=3)

# Set up the trainer
trainer = Trainer(max_epochs=10)

# Train the model
trainer.fit(lightning_model)

# This will automatically call your `predict_step()` for each batch in `test_loader`
predictions = trainer.predict(model=lightning_model, dataloaders=test_loader)

predicted_classes = [torch.argmax(batch_logits, dim=1) for batch_logits in predictions]
# Optionally, concatenate the predictions from all batches if you want a single tensor
predicted_classes = torch.cat(predicted_classes)


# Now calculate the accuracy using tensors
correct_predictions = torch.eq(predicted_classes, y_test_tensor).sum().item()
total_predictions = y_test_tensor.size(0)
accuracy = correct_predictions / total_predictions

print(f'Accuracy: {accuracy*100:.2f}%')

INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type               | Params
-------------------------------------------------
0 | layer1    | Linear             | 500   
1 | layer2    | Linear             | 10.1 K
2 | layer3    | Linear             | 303   
3 | criterion | CrossEntropyLoss   | 0     
4 | accuracy  | MulticlassAccuracy | 0     
-------------------------------------------------
10.9 K    Trainable params
0         Non-trainable params
10.9 K    Total params
0.044     Total estimated model params size (MB)
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (8) is smaller than the log

Training: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=10` reached.


Predicting: |          | 0/? [00:00<?, ?it/s]

Accuracy: 96.67%


In [42]:
from ray import tune
from ray.tune.schedulers import ASHAScheduler
from ray.tune.integration.pytorch_lightning import TuneReportCallback

class LightningThreeLayerPerceptron(LightningModule):
    def __init__(self, input_size=4, hidden_size=100, num_classes=3, learning_rate=0.001):
        super().__init__()
        self.save_hyperparameters()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.layer2 = nn.Linear(hidden_size, hidden_size)
        self.layer3 = nn.Linear(hidden_size, num_classes)
        self.criterion = nn.CrossEntropyLoss()
        self.accuracy = Accuracy(task='multiclass', num_classes=num_classes, average='macro')

    def forward(self, x):
        x = torch.relu(self.layer1(x))
        x = torch.relu(self.layer2(x))
        return self.layer3(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.criterion(logits, y)
        preds = torch.argmax(logits, dim=1)
        acc = self.accuracy(preds, y)
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        self.log('train_acc', acc, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)
        return optimizer

    def train_dataloader(self):
        # Make sure you have your DataLoader set up here
        return train_loader

def train_iris_tune(config, num_epochs=10, num_gpus=0):
    # Data preparation
    iris = load_iris()
    X, y = iris.data, iris.target
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
    X_train_tensor = torch.FloatTensor(X_train)
    y_train_tensor = torch.LongTensor(y_train)
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(dataset=train_dataset, batch_size=16, shuffle=True)

    # Model initialization with config from Ray Tune
    model = LightningThreeLayerPerceptron(
        input_size=4,
        hidden_size=config["hidden_size"],
        num_classes=3,
        learning_rate=config["lr"]
    )

    trainer = pl.Trainer(
        max_epochs=num_epochs,
        gpus=num_gpus,
        callbacks=[TuneReportCallback({"loss": "train_loss_epoch", "acc": "train_acc_epoch"}, on="epoch_end")],
    )
    trainer.fit(model, train_dataloader=train_loader)

def tune_iris_model(num_samples=10, num_epochs=10, gpus_per_trial=0):
    config = {
        "lr": tune.loguniform(1e-4, 1e-1),
        "hidden_size": tune.choice([50, 100, 150])
    }

    scheduler = ASHAScheduler(
        metric="acc",  # Keep this here
        mode="max",  # And keep this here
        max_t=num_epochs,
        grace_period=1,
        reduction_factor=2
    )

    analysis = tune.run(
        tune.with_parameters(
            train_iris_tune,
            num_epochs=num_epochs,
            num_gpus=gpus_per_trial
        ),
        resources_per_trial={"cpu": 1, "gpu": gpus_per_trial},
        # Remove the metric and mode arguments from here
        config=config,
        num_samples=num_samples,
        scheduler=scheduler,
        progress_reporter=tune.CLIReporter(
            parameter_columns=["lr", "hidden_size"],
            metric_columns=["loss", "acc", "training_iteration"]
        )
    )

    best_trial = analysis.best_trial
    best_acc = best_trial.last_result["acc"]
    print(f'Best trial final accuracy: {best_acc*100:.2f}%')

    print("Best hyperparameters found were: ", analysis.best_config)

# Ensure all necessary imports and variables are defined
# Ensure the train_iris_tune function is correctly reporting "acc" to TuneReportCallback
tune_iris_model(num_samples=10, num_epochs=10, gpus_per_trial=0)


2024-02-18 21:43:28,766	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 2. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+------------------------------------------------------------------------+
| Configuration for experiment     train_iris_tune_2024-02-18_21-43-28   |
+------------------------------------------------------------------------+
| Search algorithm                 BasicVariantGenerator                 |
| Scheduler                        AsyncHyperBandScheduler               |
| Number of trials                 10                                    |
+------------------------------------------------------------------------+

View detailed results here: /root/ray_results/train_iris_tune_2024-02-18_21-43-28
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/train_iris_tune_2024-02-18_21-43-28`

Trial status: 10 PENDING
Current time: 2024-02-18 21:43:28. Total running time: 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+----------------------------------------------------------------------+
| Trial name                    status              lr     hidden_

Resume experiment with: tune.run(..., resume=True)
- train_iris_tune_c0b2b_00000: FileNotFoundError('Could not fetch metrics for train_iris_tune_c0b2b_00000: both result.json and progress.csv were not found at /root/ray_results/train_iris_tune_2024-02-18_21-43-28/train_iris_tune_c0b2b_00000_0_hidden_size=50,lr=0.0049_2024-02-18_21-43-28')
- train_iris_tune_c0b2b_00001: FileNotFoundError('Could not fetch metrics for train_iris_tune_c0b2b_00001: both result.json and progress.csv were not found at /root/ray_results/train_iris_tune_2024-02-18_21-43-28/train_iris_tune_c0b2b_00001_1_hidden_size=150,lr=0.0068_2024-02-18_21-43-28')
- train_iris_tune_c0b2b_00002: FileNotFoundError('Could not fetch metrics for train_iris_tune_c0b2b_00002: both result.json and progress.csv were not found at /root/ray_results/train_iris_tune_2024-02-18_21-43-28/train_iris_tune_c0b2b_00002_2_hidden_size=150,lr=0.0008_2024-02-18_21-43-28')
- train_iris_tune_c0b2b_00003: FileNotFoundError('Could not fetch metrics for

Trial status: 10 PENDING
Current time: 2024-02-19 00:01:35. Total running time: 2hr 18min 6s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+----------------------------------------------------------------------+
| Trial name                    status              lr     hidden_size |
+----------------------------------------------------------------------+
| train_iris_tune_c0b2b_00000   PENDING    0.00485003               50 |
| train_iris_tune_c0b2b_00001   PENDING    0.0068225               150 |
| train_iris_tune_c0b2b_00002   PENDING    0.000822753             150 |
| train_iris_tune_c0b2b_00003   PENDING    0.00129466              150 |
| train_iris_tune_c0b2b_00004   PENDING    0.00352092              150 |
| train_iris_tune_c0b2b_00005   PENDING    0.0033268               150 |
| train_iris_tune_c0b2b_00006   PENDING    0.00277441               50 |
| train_iris_tune_c0b2b_00007   PENDING    0.000472166             100 |
| train_iris_tune_c0b2b_00008   PENDING    0.0121337         

ValueError: To fetch the `best_trial`, pass a `metric` and `mode` parameter to `tune.run()`. Alternatively, use the `get_best_trial(metric, mode)` method to set the metric and mode explicitly.