In [1]:
#torch and lightning (deep learning/model creation and training)
import torch
from torch import nn
import lightning as L
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint

#ray (tune/grid search)
from ray import train, tune
from ray.tune.search.optuna import OptunaSearch
from ray.tune.integration.pytorch_lightning import TuneReportCallback
from ray.tune.schedulers import ASHAScheduler
from functools import partial


#matplotlib and seaborn (plotting)
import matplotlib.pyplot as plt
import seaborn as sns

# #pyOD (outlier detection)
# from pyod.models.iforest import IForest

#mlflow (loggind/tracking)
import mlflow

#general purpose
import numpy as np
import pandas as pd
import os

#split data into train and test sets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, KFold


  from .autonotebook import tqdm as notebook_tqdm
2025-01-21 21:28:02,710	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2025-01-21 21:28:02,856	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [2]:
df = pd.read_parquet('data.parquet')
df.head(4)

Unnamed: 0,X86_LOCAL_APIC,OPENVSWITCH,TEXTSEARCH_FSM,NETFILTER_XT_MATCH_TCPMSS,MPLS,NFC_HCI,NETFILTER_XT_MATCH_TIME,NET_MPLS_GSO,NFC_SHDLC,NETFILTER_XT_MATCH_U32,...,ARCH_SUPPORTS_INT128,SLABINFO,MICROCODE_AMD,ISDN_DRV_HISAX,CHARGER_BQ24190,SND_SOC_NAU8825,BH1750,NETWORK_FILESYSTEMS,active_options,perf
0,1,0,0,0,1,0,0,1,0,0,...,1,0,0,0,1,0,0,0,1435,50222120
1,1,0,0,0,0,0,0,0,0,0,...,1,1,0,0,0,0,0,0,1382,16660024
2,1,0,0,0,0,0,0,0,0,0,...,1,0,1,0,0,0,0,0,1626,43080856
3,1,0,0,0,0,0,0,0,0,0,...,1,1,1,0,1,0,1,0,2140,27261672


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 92562 entries, 0 to 92561
Columns: 9469 entries, X86_LOCAL_APIC to perf
dtypes: int64(9469)
memory usage: 6.5 GB


# Float point transformation

+ Pass the data to float
+ Get used on pytorch models

In [4]:
df = df.astype('float32')

In [5]:
#df.describe()

In [6]:
df = df.sample(500)
df

Unnamed: 0,X86_LOCAL_APIC,OPENVSWITCH,TEXTSEARCH_FSM,NETFILTER_XT_MATCH_TCPMSS,MPLS,NFC_HCI,NETFILTER_XT_MATCH_TIME,NET_MPLS_GSO,NFC_SHDLC,NETFILTER_XT_MATCH_U32,...,ARCH_SUPPORTS_INT128,SLABINFO,MICROCODE_AMD,ISDN_DRV_HISAX,CHARGER_BQ24190,SND_SOC_NAU8825,BH1750,NETWORK_FILESYSTEMS,active_options,perf
8398,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2368.0,187356352.0
17569,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,...,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1766.0,22655192.0
7609,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1210.0,23184328.0
19975,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1423.0,18807096.0
61009,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1421.0,19454496.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45088,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1436.0,43939520.0
5260,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1902.0,29591920.0
23131,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1822.0,47054048.0
51233,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,2388.0,33176640.0


# sample and split the dataset

In [7]:
target_column = 'perf'

In [8]:
y = df[target_column]
X = df.drop(columns=[target_column])

In [9]:
print("X Shape: ",X.shape,"\n Y Shape: ", y.shape)

X Shape:  (500, 9468) 
 Y Shape:  (500,)


In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
X_train = X_train.to_numpy()
X_test = X_test.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [13]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)


# Create a pytorch data loader

In [14]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels
    
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]
        

        
        

In [15]:
train_dataset = CustomDataset(X_train_tensor, y_train_tensor)
test_dataset = CustomDataset(X_test_tensor, y_test_tensor)

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=512, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=512, shuffle=False)

# Create the models, using pytorch lightningmodule

In [16]:
number_of_features = X_train.shape[1]

In [17]:
# model_sequence_high_dropout = torch.nn.Sequential(nn.Linear(number_of_features, number_of_features/2),
#                                      nn.ReLU(),
#                                      nn.Dropout(0.8),
#                                      nn.Linear(number_of_features/2, number_of_features/2),
#                                      nn.ReLU(),
#                                      nn.Dropout(0.5),
#                                      nn.Linear(number_of_features/2, 1))

# model_sequence_low_dropout = torch.nn.Sequential(nn.Linear(number_of_features, number_of_features/2),
#                                      nn.ReLU(),
#                                      nn.Dropout(0.3),
#                                      nn.Linear(number_of_features/2, number_of_features/2),
#                                      nn.ReLU(),
#                                      nn.Dropout(0.3),
#                                      nn.Linear(number_of_features/2, 1))

In [18]:
class LightningModel(L.LightningModule):
    def __init__(self, num_features, activation="ReLU", optimizer_name="Adam", loss_name="MSELoss"):
        super().__init__()
        self.num_features = num_features
        self.activation = activation
        self.optimizer_name = optimizer_name
        self.loss_name = loss_name
        self.model = self.build_model()

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, _ = batch
        x = x.view(x.size(0), -1)
        z = self(x)
        loss = self.get_loss_function()(z, x)
        self.log('train_loss', loss)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, _ = batch
        x = x.view(x.size(0), -1)
        z = self(x)
        loss = self.get_loss_function()(z, x)
        self.log('val_loss', loss)
        return loss

    def configure_optimizers(self):
        return self.get_optimizer()

    def get_optimizer(self):
        optimizers = {
            "Adam": torch.optim.Adam(self.parameters(), lr=0.001),
            "SGD": torch.optim.SGD(self.parameters(), lr=0.001),
            "RMSprop": torch.optim.RMSprop(self.parameters(), lr=0.001)
        }
        return optimizers[self.optimizer_name]

    def get_loss_function(self):
        loss_functions = {
            "MSELoss": nn.MSELoss(),
            "L1Loss": nn.L1Loss(),
            "SmoothL1Loss": nn.SmoothL1Loss(),
            "CrossEntropyLoss": nn.CrossEntropyLoss()
        }
        return loss_functions[self.loss_name]

    def get_activation(self):
        activations = {
            "ReLU": nn.ReLU(),
            "PReLU": nn.PReLU(),
            "ELU": nn.ELU()
        }
        return activations[self.activation]

    def build_model(self):
        hidden_size = self.num_features // 2
        hidden_size2 = hidden_size // 2
        return nn.Sequential(
            nn.Linear(self.num_features, hidden_size),
            self.get_activation(),
            nn.Dropout(0.3),
            nn.Linear(hidden_size, hidden_size2),
            self.get_activation(),
            nn.Dropout(0.3),
            nn.Linear(hidden_size2, 1)
        )

change the max epochs if your upper limmit is greater than 100 <br>
`def train_model_tune(config, num_features, train_dataloader, val_dataloader, max_epochs=100):`

In [19]:
def train_model_tune(config, num_features, train_dataloader, val_dataloader, max_epochs=100):
    model = LightningModel(
        num_features=num_features,
        activation=config["activation"],
        optimizer_name=config["optimizer"],
        loss_name=config["loss_function"]
    )

    metrics = {"loss": "val_loss"}
    callbacks = [TuneReportCallback(metrics, on="validation_end")]
    
    trainer = L.Trainer(
        max_epochs=max_epochs,
        accelerator='auto',
        devices=1,
        callbacks=callbacks,
        enable_progress_bar=False  # Disable progress bar for cleaner Ray Tune output
    )
    
    trainer.fit(model, train_dataloader, val_dataloader)


In [20]:
def tune_hyperparameters(num_features, train_dataloader, val_dataloader, num_samples=10):
    # Define the search space
    config = {
        "optimizer": tune.choice(["Adam", "AdamW"]),
        "loss_function": tune.choice(["MAPE", "MSE","SmoothL1Loss"]),
        "activation": tune.choice(["ReLU", "PReLU", "ELU"]),
    }
    
    search_algo = OptunaSearch(
        metric="loss",
        mode="min"
    )
    
    scheduler = ASHAScheduler(
        time_attr='training_iteration',
        max_t=30,
        grace_period=10,
        reduction_factor=2
    )
    
    tuner = tune.Tuner(
        tune.with_resources(
            partial(
                train_model_tune,
                num_features=num_features,
                train_dataloader=train_dataloader,
                val_dataloader=val_dataloader
            ),
            resources={"cpu": 1, "gpu": 0.5}  # Adjust based on your hardware
        ),
        tune_config=tune.TuneConfig(
            metric="loss",
            mode="min",
            search_alg=search_algo,
            scheduler=scheduler,
            num_samples=num_samples
        ),
        param_space=config
    )
    
    results = tuner.fit()
    
    # Get best trial
    best_result = results.get_best_result(metric="loss", mode="min")
    best_trial_config = best_result.config
    best_trial_loss = best_result.metrics['loss']
    print(f"Best trial config: {best_trial_config}")
    print(f"Best trial final validation loss: {best_trial_loss}")
    
    return best_trial_config

In [21]:
num_features = X.shape[1]
num_features

9468

In [22]:
best_config = tune_hyperparameters(
        num_features=num_features,
        train_dataloader=train_dataloader,
        val_dataloader=test_dataloader,
        num_samples=10  # Number of trials to run
    )

final_model = LightningModel(
        num_features=num_features,
        **best_config
    )

final_trainer = L.Trainer(
        max_epochs=100,
        accelerator='auto',
        devices=1
    )

final_trainer.fit(final_model, train_dataloader, test_dataloader)

0,1
Current time:,2025-01-21 21:28:49
Running for:,00:00:40.96
Memory:,26.4/30.5 GiB

Trial name,# failures,error file
train_model_tune_b13f0445,1,"/tmp/ray/session_2025-01-21_21-28-05_447780_244198/artifacts/2025-01-21_21-28-08/train_model_tune_2025-01-21_21-28-05/driver_artifacts/train_model_tune_b13f0445_1_activation=ELU,loss_function=SmoothL1Loss,optimizer=AdamW_2025-01-21_21-28-08/error.txt"
train_model_tune_d175faa3,1,"/tmp/ray/session_2025-01-21_21-28-05_447780_244198/artifacts/2025-01-21_21-28-08/train_model_tune_2025-01-21_21-28-05/driver_artifacts/train_model_tune_d175faa3_3_activation=ReLU,loss_function=MSE,optimizer=Adam_2025-01-21_21-28-13/error.txt"
train_model_tune_71d4ebfc,1,"/tmp/ray/session_2025-01-21_21-28-05_447780_244198/artifacts/2025-01-21_21-28-08/train_model_tune_2025-01-21_21-28-05/driver_artifacts/train_model_tune_71d4ebfc_4_activation=ELU,loss_function=MAPE,optimizer=AdamW_2025-01-21_21-28-15/error.txt"

Trial name,status,loc,activation,loss_function,optimizer,iter,total time (s),loss
train_model_tune_fe544ee3,RUNNING,192.168.0.176:246005,PReLU,SmoothL1Loss,Adam,26.0,34.5949,3.11031
train_model_tune_8f4cd759,RUNNING,192.168.0.176:246324,ReLU,SmoothL1Loss,Adam,20.0,27.2251,0.258359
train_model_tune_20913f54,PENDING,,ReLU,MSE,AdamW,,,
train_model_tune_b13f0445,ERROR,192.168.0.176:245922,ELU,SmoothL1Loss,AdamW,,,
train_model_tune_d175faa3,ERROR,192.168.0.176:246127,ReLU,MSE,Adam,,,
train_model_tune_71d4ebfc,ERROR,192.168.0.176:246236,ELU,MAPE,AdamW,,,




[36m(func pid=245922)[0m /home/pedro/Documents/dl4spl2/venv/lib/python3.10/site-packages/ray/tune/integration/pytorch_lightning.py:198: `ray.tune.integration.pytorch_lightning.TuneReportCallback` is deprecated. Use `ray.tune.integration.pytorch_lightning.TuneReportCheckpointCallback` instead.
[36m(func pid=245922)[0m GPU available: True (cuda), used: True
[36m(func pid=245922)[0m TPU available: False, using: 0 TPU cores
[36m(func pid=245922)[0m HPU available: False, using: 0 HPUs
[36m(func pid=245922)[0m You are using a CUDA device ('NVIDIA GeForce RTX 4060 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
[36m(func pid=245922)[0m LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
2025-01-21 21:28:11,625	ERROR tune_controlle

Best trial config: {'optimizer': 'Adam', 'loss_function': 'SmoothL1Loss', 'activation': 'ReLU'}
Best trial final validation loss: 0.2583586573600769


TypeError: LightningModel.__init__() got an unexpected keyword argument 'optimizer'