In [2]:
import os
import pandas as pd
import ast
pd.set_option('display.max_columns', None)

import numpy as np

In [3]:
# CONSTANTS

data_path = r"./data/interm/data_to_train_meantime.csv"

In [4]:
# LOAD THE ESSENTIALS

df = pd.read_csv(data_path)

In [5]:
df = df.drop(columns=["Unnamed: 0","submit_time"])

In [6]:
df

Unnamed: 0,submit_day,submit_hour,group_id,num_tasks,cores_per_task,num_nodes_req,num_cores_req,req_nodes,req_switch,num_gpus_req,mem_req,job_mean_power_consumption,job_min_power_consumption,job_max_power_consumption
0,31,22,25200,64.0,4,16,256,0,0,64,475,519.930556,465.00,531.250
1,31,22,25200,64.0,4,16,256,0,0,64,475,510.301724,363.75,533.125
2,31,22,25200,64.0,4,16,256,0,0,64,475,512.069444,362.50,531.875
3,31,23,25200,0.0,32,1,32,0,0,4,59,630.377358,520.00,860.000
4,31,23,25200,0.0,32,1,32,0,0,4,59,860.208333,860.00,870.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
231233,7,5,25200,4.0,32,1,128,0,0,4,237,920.000000,920.00,920.000
231234,7,7,25200,4.0,1,1,4,0,0,4,7,863.000000,860.00,870.000
231235,7,9,25200,8.0,16,1,128,0,0,4,234,877.745455,610.00,1000.000
231236,7,6,25200,4.0,32,1,128,0,0,4,237,860.000000,860.00,860.000


In [7]:
columns_set = set(df.columns.values.tolist())
target_set = set(["job_mean_power_consumption","job_min_power_consumption","job_max_power_consumption"])
Y_columns = list(target_set)
X_columns = list(columns_set - target_set)

In [8]:
df[X_columns]

Unnamed: 0,cores_per_task,mem_req,num_cores_req,req_nodes,group_id,submit_day,num_gpus_req,submit_hour,num_tasks,req_switch,num_nodes_req
0,4,475,256,0,25200,31,64,22,64.0,0,16
1,4,475,256,0,25200,31,64,22,64.0,0,16
2,4,475,256,0,25200,31,64,22,64.0,0,16
3,32,59,32,0,25200,31,4,23,0.0,0,1
4,32,59,32,0,25200,31,4,23,0.0,0,1
...,...,...,...,...,...,...,...,...,...,...,...
231233,32,237,128,0,25200,7,4,5,4.0,0,1
231234,1,7,4,0,25200,7,4,7,4.0,0,1
231235,16,234,128,0,25200,7,4,9,8.0,0,1
231236,32,237,128,0,25200,7,4,6,4.0,0,1


In [9]:
import torch
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler
from torch.utils.data import TensorDataset, DataLoader

In [25]:
# MAKE THE SPLITS

X,y = df[X_columns], df[Y_columns]

### FOR CLASSIC MODELS

# Assuming you have your data in X (features) and y (labels)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.7, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

scaler = RobustScaler()
X_train_scaled = scaler.fit_transform(X_train)  # Fit only on train!
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

y_train = np.array(y_train)
y_val = np.array(y_val)
y_test = np.array(y_test)


### FOR TORCH MODELS

# Convert to tensors (on CPU)
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)  # Use float32 for regression

X_val_tensor = torch.tensor(X_val_scaled, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Create datasets and loaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

batch_size = 1028
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [27]:
print(f"Number of batches - Train: {len(train_loader)}, Val: {len(val_loader)}, Test: {len(test_loader)}")

Number of batches - Train: 68, Val: 79, Test: 79


In [28]:
import sys
sys.path.append('./models/code_models/')

import sklearn_models
import torch_models
import training_utils

# SKLEARN

In [18]:
rf_model = sklearn_models.get_random_forest()

rf_trainer = training_utils.SklearnTrainer(
    model=rf_model,
    model_name="RandomForest_Default",
    project_name="Test",
    entity="iqbalch-universidad-carlos-iii-de-madrid" 
)

rf_model, rf_metrics = rf_trainer.train(
    X_train_scaled, y_train,
    X_val_scaled, y_val,
    config= rf_model.get_params()
)

print("\nValidation Metrics:")
for key, value in rf_metrics.items():
    if 'val' in key:
        print(f"{key}: {value:.4f}")

test_metrics, test_predictions = training_utils.evaluate_model(
    rf_model, X_test_scaled, y_test, model_type="sklearn"
)

print("\nTest Set Metrics:")
for key, value in test_metrics.items():
    print(f"{key}: {value:.4f}")


wandb: Currently logged in as: iqbalch (iqbalch-universidad-carlos-iii-de-madrid) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


Training RandomForest_Default...


0,1
train_mae_max_power,▁
train_mae_mean,▁
train_mae_mean_power,▁
train_mae_min_power,▁
train_r2_max_power,▁
train_r2_mean,▁
train_r2_mean_power,▁
train_r2_min_power,▁
train_rmse_max_power,▁
train_rmse_mean,▁

0,1
best_val_mae,126.27648
best_val_r2,0.31887
best_val_rmse,180.87846
train_mae_max_power,123.25696
train_mae_mean,100.22107
train_mae_mean_power,91.2246
train_mae_min_power,86.18164
train_r2_max_power,0.56261
train_r2_mean,0.54462
train_r2_mean_power,0.51828



Validation Metrics:
val_rmse_mean_power: 161.7926
val_mae_mean_power: 113.5575
val_r2_mean_power: 0.3041
val_rmse_min_power: 159.8107
val_mae_min_power: 108.7766
val_r2_min_power: 0.3294
val_rmse_max_power: 221.0320
val_mae_max_power: 156.4954
val_r2_max_power: 0.3231
val_rmse_mean: 180.8785
val_mae_mean: 126.2765
val_r2_mean: 0.3189

Test Set Metrics:
test_rmse_mean_power: 161.3757
test_mae_mean_power: 113.4599
test_r2_mean_power: 0.3050
test_mape_mean_power: 15.8715
test_rmse_min_power: 160.0210
test_mae_min_power: 109.1074
test_r2_min_power: 0.3190
test_mape_min_power: 22.8296
test_rmse_max_power: 221.0219
test_mae_max_power: 156.7295
test_r2_max_power: 0.3255
test_mape_max_power: 19.9669
test_rmse_mean: 180.8062
test_mae_mean: 126.4323
test_r2_mean: 0.3165
test_mape_mean: 19.5560


# TORCH

In [31]:
input_dim = X_train_scaled.shape[1]
siple_mlp_model = torch_models.SimpleMLP(input_dim=input_dim)

resnet_trainer = training_utils.PyTorchTrainer(
    model=siple_mlp_model,
    model_name="MLP Model",
    project_name="Test",
    entity="iqbalch-universidad-carlos-iii-de-madrid"
)

resnet_model, resnet_best_metrics = resnet_trainer.train(
    train_loader=train_loader,
    val_loader=val_loader,
    epochs=150,
    lr=0.001,
    weight_decay=1e-5,
    patience=15,
)

# Evaluate on test
resnet_test_metrics, _ = training_utils.evaluate_model(
    resnet_model, X_test_scaled, y_test, model_type="pytorch"
)

print("\nTest Set Metrics:")
print(f"RMSE: {resnet_test_metrics['test_rmse_mean']:.4f}")
print(f"MAE: {resnet_test_metrics['test_mae_mean']:.4f}")
print(f"R2: {resnet_test_metrics['test_r2_mean']:.4f}")



Early stopping at epoch 127


0,1
epoch,▁▁▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇█
lr,██████████████████████████▄▄▃▃▃▃▃▃▃▂▂▂▂▁
train_loss,███▇▆▄▄▃▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_rmse,█▇▆▆▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mae_max_power,███▇▆▅▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mae_mean_power,██▇▇▇▆▅▄▄▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mae_min_power,███▆▆▅▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_r2_max_power,▁▂▃▃▄▄▅▅▅▆▆▇▇▇██████████████████████████
val_r2_mean_power,▁▂▂▄▄▆▆▇▇▇██████████████████████████████

0,1
best_epoch,112
best_val_loss,45731.27255
best_val_rmse,213.84871
epoch,127
lr,3e-05
train_loss,47647.11437
train_rmse,218.28219
val_loss,46053.23353
val_mae_max_power,196.73834
val_mae_mean_power,140.24409



Test Set Metrics:
RMSE: 211.1253
MAE: 156.9213
R2: 0.0703
