In [None]:
!pip install -U ray

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import TensorDataset, DataLoader

from ray import tune
from ray.air import session
from ray.tune.schedulers import ASHAScheduler


In [None]:
def get_data():
    np.random.seed(0)
    X = np.random.rand(1000, 5)
    y = 3 * X[:, 0] + 2 * X[:, 1] + np.random.randn(1000) * 0.1

    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)

    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_val = scaler.transform(X_val)

    return (
        torch.tensor(X_train, dtype=torch.float32),
        torch.tensor(y_train, dtype=torch.float32).unsqueeze(1),
        torch.tensor(X_val, dtype=torch.float32),
        torch.tensor(y_val, dtype=torch.float32).unsqueeze(1)
    )


In [None]:
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

def get_data():
    np.random.seed(0)
    X = np.random.rand(1000, 5)  # 1000 samples, 5 features
    y = 3 * X[:, 0] + 2 * X[:, 1] + np.random.randn(1000) * 0.1  # linear with noise

    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, shuffle=True)

    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_val = scaler.transform(X_val)

    # Convert to torch tensors; regression targets as floats with shape (N,1)
    return (
        torch.tensor(X_train, dtype=torch.float32),
        torch.tensor(y_train, dtype=torch.float32).unsqueeze(1),
        torch.tensor(X_val, dtype=torch.float32),
        torch.tensor(y_val, dtype=torch.float32).unsqueeze(1),
    )

def plot_data(X_train, y_train):
    # Move to numpy for plotting
    X = X_train.numpy()
    y = y_train.squeeze(1).numpy()

    print(f"Training set size: {X.shape[0]} samples, {X.shape[1]} features")
    print(f"Validation set size: {len(y)} samples")
    print(f"Target y mean: {y.mean():.4f}, std: {y.std():.4f}")

    # Scatter: y vs first feature
    plt.figure()
    plt.scatter(X[:, 0], y, s=10)
    plt.xlabel("Feature X[:,0]")
    plt.ylabel("Target y")
    plt.title("y vs X[:,0]")
    plt.tight_layout()
    plt.show()

    # Scatter: y vs second feature
    plt.figure()
    plt.scatter(X[:, 1], y, s=10)
    plt.xlabel("Feature X[:,1]")
    plt.ylabel("Target y")
    plt.title("y vs X[:,1]")
    plt.tight_layout()
    plt.show()

    # Histogram of y
    plt.figure()
    plt.hist(y, bins=30)
    plt.xlabel("Target y")
    plt.title("Distribution of y (training)")
    plt.tight_layout()
    plt.show()

if __name__ == "__main__":
    X_train, y_train, X_val, y_val = get_data()
    plot_data(X_train, y_train)


Dataset sizes:

Training: 800 samples × 5 features

Validation: 200 samples × 5 features

In [None]:
data[0].shape

In [None]:
data[1].shape

In [None]:
class SimpleRegressor(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1)
        )

    def forward(self, x):
        return self.net(x)


In [None]:
import os
import tempfile

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

from ray.air import session
#from ray.air.checkpoint import Checkpoint


def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train()
    total_loss = 0.0
    count = 0
    for xb, yb in loader:
        xb = xb.to(device)
        yb = yb.to(device)

        preds = model(xb)  # (N, 1)
        loss = criterion(preds, yb)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * xb.size(0)
        count += xb.size(0)

    avg_loss = total_loss / count
    return avg_loss


def r2_score(preds, targets):
    ss_res = torch.sum((targets - preds) ** 2)
    ss_tot = torch.sum((targets - torch.mean(targets)) ** 2)
    return 1 - ss_res / ss_tot


def validate(model, X_val, y_val, criterion, device):
    model.eval()
    with torch.no_grad():
        X_val = X_val.to(device)
        y_val = y_val.to(device)

        preds = model(X_val)
        val_loss = criterion(preds, y_val).item()
        val_r2 = r2_score(preds, y_val).item()
    return val_loss, val_r2


def train_model(config):
    X_train, y_train, X_val, y_val = get_data()  # your regression data

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = SimpleRegressor(input_dim=X_train.shape[1], hidden_dim=config["hidden_dim"])
    model = model.to(device)

    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=config["lr"])

    train_dataset = TensorDataset(X_train, y_train)
    train_loader = DataLoader(
        train_dataset,
        batch_size=int(config["batch_size"]),
        shuffle=True,
    )

    for epoch in range(int(config["max_num_epochs"])):
        train_loss = train_one_epoch(model, train_loader, criterion, optimizer, device)
        val_loss, val_r2 = validate(model, X_val, y_val, criterion, device)

        # Save checkpoint and report to Ray AIR session
        """
        with tempfile.TemporaryDirectory() as td:
            path = os.path.join(td, "model.pt")
            torch.save(model.state_dict(), path)
            ckpt = Checkpoint.from_directory(td)

            session.report(
                {
                    "train_loss": train_loss,
                    "val_loss": val_loss,
                    "val_r2": val_r2,
                    "epoch": epoch,
                },
                checkpoint=ckpt,
            )
        """


In [None]:
# conservative sizes: limit concurrency and shrink object store
ray.init(num_cpus=4, object_store_memory=2 * 1024**3)

In [None]:
#import ray

#ray.init(object_store_memory=500 * 1024 * 1024)  # 500 MB


In [None]:
from ray import tune
from ray.tune.schedulers import ASHAScheduler

def main(search_config, gpus_per_trial=1):
    scheduler = ASHAScheduler(
        time_attr="training_iteration",
        max_t=search_config["max_num_epochs"],
        grace_period=1,
        reduction_factor=2,
    )

    tuner = tune.Tuner(
        tune.with_resources(
            tune.with_parameters(train_model),  # train_model accepts config
            resources={"cpu": 2, "gpu": gpus_per_trial},
        ),
        tune_config=tune.TuneConfig(
            metric="val_loss",          # optimize validation loss
            mode="min",
            scheduler=scheduler,
            num_samples=search_config["num_trials"],
        ),
        param_space=search_config,
    )

    results = tuner.fit()
    best_result = results.get_best_result("val_loss", "min")

    print(f"\n✅ Best trial config: {best_result.config}")
    print(f"✅ Best trial final validation loss: {best_result.metrics['val_loss']:.4f}")
    print(f"✅ Best trial final validation R²: {best_result.metrics['val_r2']:.4f}")

    test_best_model(best_result, smoke_test=search_config.get("smoke_test", False))
    return results


In [None]:
search_config = {
    "lr": tune.loguniform(1e-4, 1e-1),
    "hidden_dim": tune.choice([16, 32, 64]),
    "batch_size": tune.choice([16, 32, 64]),
    "max_num_epochs": 10,
    "num_trials": 10,
    "smoke_test": False,
    "log_tb": True,  # keep if you use TensorBoard logging
}

r = main(search_config, gpus_per_trial=1 if torch.cuda.is_available() else 0)



In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import TensorDataset, DataLoader

from ray import tune
from ray.air import session
from ray.tune.schedulers import ASHAScheduler

# Step 1: Generate synthetic regression data
def get_data():
    np.random.seed(0)
    X = np.random.rand(1000, 5)
    y = 3 * X[:, 0] + 2 * X[:, 1] + np.random.randn(1000) * 0.1

    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)

    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_val = scaler.transform(X_val)

    return (
        torch.tensor(X_train, dtype=torch.float32),
        torch.tensor(y_train, dtype=torch.float32).unsqueeze(1),
        torch.tensor(X_val, dtype=torch.float32),
        torch.tensor(y_val, dtype=torch.float32).unsqueeze(1)
    )

# Step 2: Define the model
class SimpleRegressor0(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1)
        )

    def forward(self, x):
        return self.net(x)


class SimpleRegressor(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.ReLU(),
            nn.Linear(hidden_dim // 2, 1)
        )

    def forward(self, x):
        return self.net(x)




# Step 3: Training helper
def train_one_epoch(model, train_loader, criterion, optimizer):
    model.train()
    for xb, yb in train_loader:
        pred = model(xb)
        loss = criterion(pred, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

# Step 4: Validation helper
def validate(model, X_val, y_val, criterion):
    model.eval()
    with torch.no_grad():
        val_pred = model(X_val)
        val_loss = criterion(val_pred, y_val).item()
        val_r2 = 1 - val_loss / torch.var(y_val).item()  # pseudo R²
    return val_loss, val_r2

# Step 5: Training function for Ray Tune
def train_model(config):
    X_train, y_train, X_val, y_val = get_data()
    model = SimpleRegressor(input_dim=5, hidden_dim=config["hidden_dim"])
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=config["lr"])

    train_loader = DataLoader(
        TensorDataset(X_train, y_train),
        batch_size=int(config["batch_size"]),
        shuffle=True
    )

    for epoch in range(config["max_num_epochs"]):
        train_one_epoch(model, train_loader, criterion, optimizer)
        val_loss, val_r2 = validate(model, X_val, y_val, criterion)
        session.report({"loss": val_loss, "accuracy": val_r2})

# Step 6: Test best model
def test_best_model(best_result, smoke_test=False):
    X_train, y_train, X_val, y_val = get_data()
    model = SimpleRegressor(input_dim=5, hidden_dim=best_result.config["hidden_dim"])
    # No checkpoint loading here since checkpointing is off; retrain model on full data if needed.
    model.eval()
    with torch.no_grad():
        preds = model(X_val)
        mse = nn.MSELoss()(preds, y_val).item()
        r2 = 1 - mse / torch.var(y_val).item()
        print(f"[TEST] Final MSE: {mse:.4f}, R²: {r2:.4f}")

# Step 7: Main function
def main(config, gpus_per_trial=1):
    scheduler = ASHAScheduler(
        time_attr="training_iteration",
        max_t=config["max_num_epochs"],
        grace_period=1,
        reduction_factor=2,
    )

    tuner = tune.Tuner(
        tune.with_resources(
            tune.with_parameters(train_model),
            resources={"cpu": 2, "gpu": gpus_per_trial}
        ),
        tune_config=tune.TuneConfig(
            metric="loss",
            mode="min",
            scheduler=scheduler,
            num_samples=config["num_trials"],
        ),
        param_space=config,
    )

    results = tuner.fit()
    best_result = results.get_best_result("loss", "min")

    print(f"\n✅ Best trial config: {best_result.config}")
    print(f"✅ Best trial final validation loss: {best_result.metrics['loss']:.4f}")
    print(f"✅ Best trial final validation R²: {best_result.metrics['accuracy']:.4f}")

    test_best_model(best_result, smoke_test=config.get("smoke_test", False))

    return results

# Step 8: Run tuning
search_config = {
    "lr": tune.loguniform(1e-4, 1e-1),
    "hidden_dim": tune.choice([16, 32, 64]),
    "batch_size": tune.choice([16, 32, 64]),
    "max_num_epochs": 10,
    "num_trials": 5, #10
    "smoke_test": False,
}

r = main(search_config, gpus_per_trial=1 if torch.cuda.is_available() else 0)


So negative R² means your model fits worse than a naive baseline that predicts the average target.

If you get negative R² consistently, it usually means:

Your model is underfitting badly.

Or your training process has a bug (bad labels, wrong predictions, data leakage, etc.).

In practice, an R² near 1 is great, around 0 means “no better than mean,” and below 0 is poor.

In [None]:
r

# with train loss

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import TensorDataset, DataLoader

from ray import tune
from ray.air import session
from ray.tune.schedulers import ASHAScheduler


def get_data():
    np.random.seed(0)
    X = np.random.rand(1000, 5)
    y = 3 * X[:, 0] + 2 * X[:, 1] + np.random.randn(1000) * 0.1

    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)

    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_val = scaler.transform(X_val)

    return (
        torch.tensor(X_train, dtype=torch.float32),
        torch.tensor(y_train, dtype=torch.float32).unsqueeze(1),
        torch.tensor(X_val, dtype=torch.float32),
        torch.tensor(y_val, dtype=torch.float32).unsqueeze(1),
    )


class SimpleRegressor(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.ReLU(),
            nn.Linear(hidden_dim // 2, 1),
        )

    def forward(self, x):
        return self.net(x)


def train_one_epoch(model, train_loader, criterion, optimizer):
    model.train()
    total_loss = 0.0
    count = 0
    for xb, yb in train_loader:
        pred = model(xb)
        loss = criterion(pred, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * xb.size(0)
        count += xb.size(0)
    return total_loss / count


def r2_score(preds, targets):
    ss_res = torch.sum((targets - preds) ** 2)
    ss_tot = torch.sum((targets - torch.mean(targets)) ** 2)
    return 1 - ss_res / ss_tot


def validate(model, X_val, y_val, criterion):
    model.eval()
    with torch.no_grad():
        val_pred = model(X_val)
        val_loss = criterion(val_pred, y_val).item()
        val_r2 = r2_score(val_pred, y_val).item()
    return val_loss, val_r2


def train_model(config):
    X_train, y_train, X_val, y_val = get_data()
    model = SimpleRegressor(input_dim=5, hidden_dim=config["hidden_dim"])
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=config["lr"])

    train_loader = DataLoader(
        TensorDataset(X_train, y_train),
        batch_size=int(config["batch_size"]),
        shuffle=True,
    )

    for epoch in range(config["max_num_epochs"]):
        train_loss = train_one_epoch(model, train_loader, criterion, optimizer)
        val_loss, val_r2 = validate(model, X_val, y_val, criterion)

        session.report(
            {
                "train_loss": train_loss,
                "loss": val_loss,    # Tune optimizes this metric
                "val_r2": val_r2,
                "epoch": epoch,
            }
        )


def test_best_model(best_result, smoke_test=False):
    X_train, y_train, X_val, y_val = get_data()
    model = SimpleRegressor(input_dim=5, hidden_dim=best_result.config["hidden_dim"])
    model.eval()
    with torch.no_grad():
        preds = model(X_val)
        mse = nn.MSELoss()(preds, y_val).item()
        r2 = 1 - mse / torch.var(y_val).item()
        print(f"[TEST] Final MSE: {mse:.4f}, R²: {r2:.4f}")


def main(config, gpus_per_trial=1):
    scheduler = ASHAScheduler(
        time_attr="training_iteration",
        max_t=config["max_num_epochs"],
        grace_period=1,
        reduction_factor=2,
    )

    tuner = tune.Tuner(
        tune.with_resources(
            tune.with_parameters(train_model),
            resources={"cpu": 2, "gpu": gpus_per_trial},
        ),
        tune_config=tune.TuneConfig(
            metric="loss",
            mode="min",
            scheduler=scheduler,
            num_samples=config["num_trials"],
        ),
        param_space=config,
    )

    results = tuner.fit()
    best_result = results.get_best_result("loss", "min")

    print(f"\n✅ Best trial config: {best_result.config}")
    print(f"✅ Best trial final validation loss: {best_result.metrics['loss']:.4f}")
    print(f"✅ Best trial final validation R²: {best_result.metrics['val_r2']:.4f}")

    test_best_model(best_result, smoke_test=config.get("smoke_test", False))

    return results


search_config = {
    "lr": tune.loguniform(1e-4, 1e-1),
    "hidden_dim": tune.choice([16, 32, 64]),
    "batch_size": tune.choice([16, 32, 64]),
    "max_num_epochs": 10,
    "num_trials": 5,
    "smoke_test": False,
}

r = main(search_config, gpus_per_trial=1 if torch.cuda.is_available() else 0)


### with checkpoints

In [None]:
#pip install --upgrade ray
!pip install -U ray


In [1]:
import ray
print(ray.__version__)


2.48.0


In [2]:
import ray.air.checkpoint as checkpoint
print(checkpoint)


ModuleNotFoundError: No module named 'ray.air.checkpoint'

In [5]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import TensorDataset, DataLoader

from ray import tune




#from ray.air import session



from ray.tune.schedulers import ASHAScheduler


#from ray.air.checkpoint import Checkpoint
# 2 -okfrom ray.train import Checkpoint
from ray.tune import Checkpoint




import tempfile


def get_data():
    np.random.seed(0)
    X = np.random.rand(1000, 5)
    y = 3 * X[:, 0] + 2 * X[:, 1] + np.random.randn(1000) * 0.1

    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)

    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_val = scaler.transform(X_val)

    return (
        torch.tensor(X_train, dtype=torch.float32),
        torch.tensor(y_train, dtype=torch.float32).unsqueeze(1),
        torch.tensor(X_val, dtype=torch.float32),
        torch.tensor(y_val, dtype=torch.float32).unsqueeze(1),
    )


class SimpleRegressor(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.ReLU(),
            nn.Linear(hidden_dim // 2, 1),
        )

    def forward(self, x):
        return self.net(x)


def train_one_epoch(model, train_loader, criterion, optimizer):
    model.train()
    total_loss = 0.0
    count = 0
    for xb, yb in train_loader:
        pred = model(xb)
        loss = criterion(pred, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * xb.size(0)
        count += xb.size(0)
    return total_loss / count


def r2_score(preds, targets):
    ss_res = torch.sum((targets - preds) ** 2)
    ss_tot = torch.sum((targets - torch.mean(targets)) ** 2)
    return 1 - ss_res / ss_tot


def validate(model, X_val, y_val, criterion):
    model.eval()
    with torch.no_grad():
        val_pred = model(X_val)
        val_loss = criterion(val_pred, y_val).item()
        val_r2 = r2_score(val_pred, y_val).item()
    return val_loss, val_r2


def train_model(config):
    X_train, y_train, X_val, y_val = get_data()
    model = SimpleRegressor(input_dim=5, hidden_dim=config["hidden_dim"])
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=config["lr"])

    train_loader = DataLoader(
        TensorDataset(X_train, y_train),
        batch_size=int(config["batch_size"]),
        shuffle=True,
    )

    for epoch in range(config["max_num_epochs"]):
        train_loss = train_one_epoch(model, train_loader, criterion, optimizer)
        val_loss, val_r2 = validate(model, X_val, y_val, criterion)

        # Save checkpoint to temp directory
        with tempfile.TemporaryDirectory() as tmpdir:
            path = f"{tmpdir}/model.pth"
            torch.save(model.state_dict(), path)
            ckpt = Checkpoint.from_directory(tmpdir)

            #session.report(
            tune.report(
                {
                    "train_loss": train_loss,
                    "loss": val_loss,
                    "val_r2": val_r2,
                    "epoch": epoch,
                },
                checkpoint=ckpt,
            )


def test_best_model(best_result, smoke_test=False):
    X_train, y_train, X_val, y_val = get_data()
    model = SimpleRegressor(input_dim=5, hidden_dim=best_result.config["hidden_dim"])

    # Load checkpoint weights
    checkpoint_path = best_result.checkpoint.to_directory()
    state_dict = torch.load(f"{checkpoint_path}/model.pth")
    model.load_state_dict(state_dict)

    model.eval()
    with torch.no_grad():
        preds = model(X_val)
        mse = nn.MSELoss()(preds, y_val).item()
        r2 = 1 - mse / torch.var(y_val).item()
        print(f"[TEST] Final MSE: {mse:.4f}, R²: {r2:.4f}")


def main(config, gpus_per_trial=1):
    scheduler = ASHAScheduler(
        time_attr="training_iteration",
        max_t=config["max_num_epochs"],
        grace_period=1,
        reduction_factor=2,
    )

    tuner = tune.Tuner(
        tune.with_resources(
            tune.with_parameters(train_model),
            resources={"cpu": 2, "gpu": gpus_per_trial},
        ),
        tune_config=tune.TuneConfig(
            metric="loss",
            mode="min",
            scheduler=scheduler,
            num_samples=config["num_trials"],
        ),
        param_space=config,
    )

    results = tuner.fit()
    best_result = results.get_best_result("loss", "min")

    print(f"\n✅ Best trial config: {best_result.config}")
    print(f"✅ Best trial final validation loss: {best_result.metrics['loss']:.4f}")
    print(f"✅ Best trial final validation R²: {best_result.metrics['val_r2']:.4f}")

    test_best_model(best_result, smoke_test=config.get("smoke_test", False))

    return results


search_config = {
    "lr": tune.loguniform(1e-4, 1e-1),
    "hidden_dim": tune.choice([16, 32, 64]),
    "batch_size": tune.choice([16, 32, 64]),
    "max_num_epochs": 10,
    "num_trials": 5,
    "smoke_test": False,
}

r = main(search_config, gpus_per_trial=1 if torch.cuda.is_available() else 0)


0,1
Current time:,2025-07-31 19:40:17
Running for:,00:00:24.70
Memory:,8.6/9.6 GiB

Trial name,status,loc,batch_size,hidden_dim,lr,iter,total time (s),train_loss,loss,val_r2
train_model_4687b_00000,TERMINATED,200.40.20.243:479073,16,16,0.000135114,1,2.44844,7.55799,7.60163,-5.67088
train_model_4687b_00001,TERMINATED,200.40.20.243:479074,32,64,0.0189052,10,2.76466,0.0119686,0.0152264,0.986638
train_model_4687b_00002,TERMINATED,200.40.20.243:479227,32,16,0.000142155,1,1.91692,7.33057,7.38299,-5.47901
train_model_4687b_00003,TERMINATED,200.40.20.243:479226,16,32,0.00448687,2,2.00379,0.101661,0.0786322,0.930996
train_model_4687b_00004,TERMINATED,200.40.20.243:479343,16,16,0.022203,8,1.4622,0.0179951,0.0144127,0.987352


[36m(train_model pid=479073)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/marcos/ray_results/train_model_2025-07-31_19-39-53/train_model_4687b_00000_0_batch_size=16,hidden_dim=16,lr=0.0001_2025-07-31_19-39-53/checkpoint_000000)
[36m(train_model pid=479074)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/marcos/ray_results/train_model_2025-07-31_19-39-53/train_model_4687b_00001_1_batch_size=32,hidden_dim=64,lr=0.0189_2025-07-31_19-39-53/checkpoint_000000)
[36m(train_model pid=479074)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/marcos/ray_results/train_model_2025-07-31_19-39-53/train_model_4687b_00001_1_batch_size=32,hidden_dim=64,lr=0.0189_2025-07-31_19-39-53/checkpoint_000001)
[36m(train_model pid=479074)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/marcos/ray_results/train_model_2025-07-31_19-39-53/train_model_4687b_00001_1_batch_size=32,hidden_


✅ Best trial config: {'lr': 0.022203016839658106, 'hidden_dim': 16, 'batch_size': 16, 'max_num_epochs': 10, 'num_trials': 5, 'smoke_test': False}
✅ Best trial final validation loss: 0.0144
✅ Best trial final validation R²: 0.9874
[TEST] Final MSE: 0.0144, R²: 0.9874


  state_dict = torch.load(f"{checkpoint_path}/model.pth")
[36m(train_model pid=479343)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/marcos/ray_results/train_model_2025-07-31_19-39-53/train_model_4687b_00004_4_batch_size=16,hidden_dim=16,lr=0.0222_2025-07-31_19-39-53/checkpoint_000007)


In [6]:
r

ResultGrid<[
  Result(
    metrics={'train_loss': 7.557986307144165, 'loss': 7.601632118225098, 'val_r2': -5.6708807945251465, 'epoch': 0},
    path='/home/marcos/ray_results/train_model_2025-07-31_19-39-53/train_model_4687b_00000_0_batch_size=16,hidden_dim=16,lr=0.0001_2025-07-31_19-39-53',
    filesystem='local',
    checkpoint=Checkpoint(filesystem=local, path=/home/marcos/ray_results/train_model_2025-07-31_19-39-53/train_model_4687b_00000_0_batch_size=16,hidden_dim=16,lr=0.0001_2025-07-31_19-39-53/checkpoint_000000)
  ),
  Result(
    metrics={'train_loss': 0.011968582347035408, 'loss': 0.01522635668516159, 'val_r2': 0.9866379499435425, 'epoch': 9},
    path='/home/marcos/ray_results/train_model_2025-07-31_19-39-53/train_model_4687b_00001_1_batch_size=32,hidden_dim=64,lr=0.0189_2025-07-31_19-39-53',
    filesystem='local',
    checkpoint=Checkpoint(filesystem=local, path=/home/marcos/ray_results/train_model_2025-07-31_19-39-53/train_model_4687b_00001_1_batch_size=32,hidden_dim=64,l