In [None]:
import os
from pathlib import Path
import sys
sys.path.append(str(Path().resolve().parent))  # Add the project root to sys.path
import os
import json
import numpy as np
from pathlib import Path
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import matplotlib.pyplot as plt
from torchvision import transforms
from utils.oct_dataset import OCTDataset
from utils.models import UnetNoPretraining, MedSAM, ResNetUNetWithAttention
from utils.lossfunctions import DiceLoss, DiceBCELoss
from utils.helper_methods import trial_dirname_creator

In [None]:
import numpy as np
import os
import tempfile
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from filelock import FileLock
from torch.utils.data import random_split
import torchvision
import torchvision.transforms as transforms
from torch.optim.lr_scheduler import ReduceLROnPlateau
from typing import Dict
import ray
from ray import train, tune
from ray.train import Checkpoint
from ray.tune.schedulers import ASHAScheduler
from ray import tune, air
from ray.air import session
from ray.tune.search.optuna import OptunaSearch
from ray.train import RunConfig, CheckpointConfig
from tqdm import tqdm
import pandas as pd
from sklearn.model_selection import StratifiedKFold
import neptune
import segmentation_models_pytorch as smp
from segment_anything import sam_model_registry
os.environ["TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S"] = "0"

def train_model(config):
    if torch.cuda.is_available():
        device = torch.device("cuda:0")
    else:
        device = torch.device("cpu")

    if config["model"] == "Unet":
        # Initialize model with the hyperparameters from the config
        net = smp.Unet(
            encoder_name="resnet34",        # choose encoder, e.g. mobilenet_v2 or efficientnet-b7
            encoder_weights="imagenet",     # use `imagenet` pre-trained weights for encoder initialization
            in_channels=3,                  # model input channels (1 for gray-scale images, 3 for RGB, etc.)
            classes=1,                      # model output channels (number of classes in your dataset)
            activation="sigmoid",           # output activation (sigmoid for binary segmentation)
        )
        if config["freeze_encoder"]:
            for param in net.encoder.parameters():
                param.requires_grad = False

    elif config["model"] == "DeepLabV3+":
        net = smp.DeepLabV3Plus(
            encoder_name="resnet34",
            encoder_weights="imagenet",
            in_channels=3,
            classes=1,
            activation="sigmoid",
        )
        if config["freeze_encoder"]:
            for param in net.encoder.parameters():
                param.requires_grad = False 
    
    elif config["model"] == "MedSam":
        MedSAM_CKPT_PATH = r"D:\OneDrive - Aarhus Universitet\9. Semester\Deep Learning\medsam\medsam_vit_b.pth"
        sam_model = sam_model_registry['vit_b'](checkpoint=MedSAM_CKPT_PATH)
        net = MedSAM(
                image_encoder=sam_model.image_encoder,
                mask_decoder=sam_model.mask_decoder,
                prompt_encoder=sam_model.prompt_encoder,
            ).to(device)
        
        if config["freeze_encoder"]:
            # Freeze the image encoder
            for param in net.image_encoder.parameters():
                param.requires_grad = False

    elif config["model"] == "AttentionUnet":
        net = ResNetUNetWithAttention()

        if(config["freeze_encoder"]):
            net = ResNetUNetWithAttention(freeze_entire_backbone=True)


    net.to(device)

    # Select optimizer based on the configuration
    if config["optimizer"] == "AdamW":
        optimizer = optim.AdamW(net.parameters(), lr=config["lr"])
    elif config["optimizer"] == "SGD":
        optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=0.9)
    elif config["optimizer"] == "RMSprop":
        optimizer = optim.RMSprop(net.parameters(), lr=config["lr"])

    # Select loss function based on the configuration
    if config["loss_function"] == "DiceLoss":
        criterion = DiceLoss()
    elif config["loss_function"] == "DiceBCELoss":
        criterion = DiceBCELoss()
    elif config["loss_function"] == "BCELoss":
        criterion = nn.BCELoss()

    transform = transforms.Compose([
        transforms.Resize((1024, 1024), interpolation=Image.NEAREST),
        transforms.ToTensor(),
    ])

    # Load existing checkpoint through `get_checkpoint()` API.
    if train.get_checkpoint():
        loaded_checkpoint = train.get_checkpoint()
        with loaded_checkpoint.as_directory() as loaded_checkpoint_dir:
            model_state, optimizer_state = torch.load(
                os.path.join(loaded_checkpoint_dir, "checkpoint.pt")
            )
            net.load_state_dict(model_state)
            optimizer.load_state_dict(optimizer_state)

    root_dir = config["root_dir"]
    folds= config["folds"]
    
    with open(os.path.join(root_dir, "metadata.csv"), "r") as f:
        metadata_df = pd.read_csv(f)
        skf = StratifiedKFold(n_splits=folds)
        splits = list(skf.split(metadata_df, metadata_df["unique_id"]))

    train_and_validate_cv(root_dir, config, splits, folds, transform, optimizer, criterion, net, device)


def train_and_validate_cv(root_dir, config, splits, folds, transform, optimizer, criterion, net, device, medsam=False):
    for fold in range(folds):
        # Train and validate the model
        print(f"Training on fold {fold+1} out of {folds}")
        
        # Initialize Neptune run
        run = neptune.init_run(
            project="OCTAA/OCTSegmenter",
            api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI2MGU2NGNjMi0yNWE0LTRjNzgtOGNlNS1hZDdkMjJhYzYxMWUifQ==",
            name="training_and_validation",
            tags="terumo",
        )  # your credentials

        run["sys/group_tags"].add([
            str(config["model"]),
            str(config["freeze_encoder"]),
            str(config["loss_function"]), 
            str(config["optimizer"]), 
            f"Fold: {str(fold)}"
        ])  # Group tags

        # Log configuration parameters
        run["parameters"] = config

        train_indices, val_indices = splits[fold]

        train_dataset = OCTDataset(root_dir, indices=train_indices, transform=transform)
        val_dataset = OCTDataset(root_dir, indices=val_indices, transform=transform)

        trainloader = DataLoader(train_dataset, batch_size=config["batch_size"], shuffle=True)
        valloader = DataLoader(val_dataset, batch_size=config["batch_size"], shuffle=False)

        best_val_loss = float("inf")
        epochs = config["epochs"]
        no_improvement_epochs = 0
        patience = config["patience"]
        scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=10)
        
        for epoch in range(epochs):
            net.train()
            running_loss = 0.0
            epoch_steps= 0

            for i, data in enumerate(trainloader):
                if config["model"] == "MedSam":
                    images, masks, _, _ = data
                    images, masks = images.to(device), masks.to(device)

                     # Get image dimensions
                    batch_size, _, height, width = images.size()

                    # Create bounding boxes that cover the whole image
                    bboxes = torch.tensor([[0, 0, width, height]] * batch_size, dtype=torch.float32).unsqueeze(1).to(device)

                    optimizer.zero_grad()
                    outputs = net(images, bboxes)
                    loss = criterion(outputs, masks)
                    loss.backward()
                    optimizer.step()

                else:
                    images, masks, _, _ = data
                    images, masks = images.to(device), masks.to(device)

                    optimizer.zero_grad()
                    outputs = net(images)
                    loss = criterion(outputs, masks)
                    loss.backward()
                    optimizer.step()

                running_loss += loss.item() * images.size(0)

                epoch_steps += 1
                if i % 10 == 9:  # print every 10 mini-batches
                    print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1,
                                                    loss.item()))


            # Calculate training loss and accuracy for the epoch
            train_loss = running_loss / len(trainloader.dataset)
            run["train_loss"].append(train_loss)  # Log training loss to neptune
            print(f"Epoch [{epoch+1}/{epochs}], Training Loss: {train_loss:.4f}")

            # Validation phase
            net.eval()
            val_loss = 0.0
            dice_loss = 0.0

            with torch.no_grad():  # No need to calculate gradients during validation
                for data in valloader:
                    if config["model"] == "MedSam":
                        images, masks, _, _ = data
                        images, masks = images.to(device), masks.to(device)

                        # Get image dimensions
                        batch_size, _, height, width = images.size()

                        # Create bounding boxes that cover the whole image
                        bboxes = torch.tensor([[0, 0, width, height]] * batch_size, dtype=torch.float32).unsqueeze(1).to(device)

                        outputs = net(images, bboxes)
                        loss = criterion(outputs, masks)
                        val_loss += loss.item() * images.size(0)

                    else:
                        images, masks, _, _ = data
                        images, masks = images.to(device), masks.to(device)

                        outputs = net(images)
                        loss = criterion(outputs, masks)
                        val_loss += loss.item() * images.size(0)

                    # Calculate Dice loss
                    dice = DiceLoss()
                    loss = dice(outputs, masks)
                    dice_loss += loss.item() * images.size(0)
            
            # Calculate validation loss and accuracy
            val_loss = val_loss / len(valloader.dataset)
            avg_dice_loss = dice_loss / len(valloader.dataset)
            scheduler.step(val_loss) # Adjust learning rate based on validation loss
            run["val_loss"].append(val_loss)  # Log validation loss
            run["dice_loss"].append(avg_dice_loss)  # Log Dice loss
            print(f"Epoch [{epoch+1}/{epochs}], Validation Loss: {val_loss:.4f}")

            with tempfile.TemporaryDirectory() as temp_checkpoint_dir:
                path = os.path.join(temp_checkpoint_dir, "checkpoint.pt")
                torch.save(
                    (net.state_dict(), optimizer.state_dict()), path
                )
                checkpoint = Checkpoint.from_directory(temp_checkpoint_dir)
                train.report(
                    {"loss": val_loss, "accuracy": 1 - avg_dice_loss, "dice_loss": avg_dice_loss, "fold": fold},
                    checkpoint=checkpoint,
                )

            # Check if validation loss improves
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                no_improvement_epochs = 0
                print(f"Validation loss improved to {val_loss:.4f}. Saving checkpoint.")
                
            else:
                no_improvement_epochs += 1
                print(f"Validation loss did not improve. Best so far: {best_val_loss:.4f}")
            
            if no_improvement_epochs >= patience:
                print(f"Stopping early. No improvement in {patience} epochs.")
                run["early_stopping"] = True
                break

        run.stop()
        print("Finished Training")

    
def test_best_model(best_result):
    
    # Initialize Neptune run
    run = neptune.init_run(
        project="OCTAA/OCTSegmenter",
        api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI2MGU2NGNjMi0yNWE0LTRjNzgtOGNlNS1hZDdkMjJhYzYxMWUifQ==",
        name="best_model_test",
        tags="gentuity"  
    )  # your credentials

    # Log configuration parameters
    run["parameters"] = best_result.config

    if torch.cuda.is_available():
        device = torch.device("cuda:0")
    else:
        device = torch.device("cpu")
    best_trained_model = UnetNoPretraining().to(device)

    checkpoint_path = os.path.join(best_result.checkpoint.to_directory(), "checkpoint.pt")

    model_state, optimizer_state = torch.load(checkpoint_path, weights_only=True)
    best_trained_model.load_state_dict(model_state)

    transform = transforms.Compose([
        transforms.Resize((1024, 1024), interpolation=Image.NEAREST),
        transforms.ToTensor(),
    ])

    root_dir = r"D:\OneDrive - Aarhus Universitet\9. Semester\Deep Learning\data_gentuity"

    test_dataset = OCTDataset(root_dir, transform=transform, train=False, is_gentuity=True)
    testloader = DataLoader(test_dataset, batch_size=best_result.config["batch_size"], shuffle=False)

    criterion = DiceLoss()

    total_loss = 0.0
    with torch.no_grad():  # Disable gradient calculation
        for data in testloader:
            if best_result.config["model"] == "MedSam":
                images, masks, _, _ = data
                images, masks = images.to(device), masks.to(device)

                # Get image dimensions
                batch_size, _, height, width = images.size()

                # Create bounding boxes that cover the whole image
                bboxes = torch.tensor([[0, 0, width, height]] * batch_size, dtype=torch.float32).unsqueeze(1).to(device)

                outputs = best_trained_model(images, bboxes)
                predicted = (outputs > 0.5).float()
                loss = criterion(predicted, masks)
                total_loss += loss.item() * images.size(0)

            else:
                images, masks, _, _ = data
                images, masks = images.to(device), masks.to(device)

                outputs = best_trained_model(images)
                predicted = (outputs > 0.5).float()
                loss = criterion(predicted, masks)
                total_loss += loss.item() * images.size(0)

    # Calculate average loss and accuracy
    total_loss /= len(testloader.dataset)
    accuracy = 1 - loss

    run["test_loss"] = total_loss
    run.stop()
    print(f"Test Loss: {total_loss:.4f}, Test Accuracy: {accuracy:.4f}")

In [None]:
def main(num_samples, gpus_per_trial, epochs, smoke_test, folds):
    if smoke_test:
        root_dir = r"D:\OneDrive - Aarhus Universitet\9. Semester\Deep Learning\data_terumo_smoke_test"

    else:
        print("Using full dataset")
    
    config = {
        "root_dir": root_dir,
        "lr": tune.loguniform(1e-6, 1e-2),
        "epochs": epochs,
        "smoke_test": smoke_test,
        "batch_size": tune.choice([4]),
        "optimizer": tune.grid_search(["AdamW", "SGD", "RMSprop"]),
        "folds": folds,
        "patience": 20,
        "loss_function": tune.grid_search(["DiceLoss", "BCELoss", "DiceBCELoss"]),
        "model": tune.grid_search(["AttentionUnet", "Unet", "DeepLabV3+", "MedSam"]),
        "freeze_encoder": tune.grid_search([True, False]),
    }

    # ASHA SCHEDULER, BUT WILL NOT BE USED
    # scheduler = ASHAScheduler(
    #     max_t=5,
    #     grace_period=5,
    #     reduction_factor=2
    # )

    # Define your checkpoint configuration
    checkpoint_config = CheckpointConfig(
        num_to_keep=1,  # Only keep the best checkpoint
        checkpoint_score_attribute="loss",  # The metric used to determine the best checkpoint
        checkpoint_score_order="min",  # Keep the checkpoint with the lowest loss
    )

    # Define the run config with the checkpoint config
    run_config = RunConfig(checkpoint_config=checkpoint_config)

    tuner = tune.Tuner(
        tune.with_resources(
            tune.with_parameters(train_model),
            resources={"cpu": 2, "gpu": gpus_per_trial}
        ),
        tune_config=tune.TuneConfig(
            metric="dice_loss",
            mode="min",
            num_samples=num_samples,
            trial_dirname_creator=trial_dirname_creator,
        ),
        param_space=config,
        run_config=run_config,
    )
    results = tuner.fit()
    
    best_result = results.get_best_result("dice_loss", "min")

    print("Best trial config: {}".format(best_result.config))
    print("Best trial final validation loss: {}".format(
        best_result.metrics["loss"]))
    print("Best trial final validation accuracy: {}".format(
        best_result.metrics["accuracy"]))

    test_best_model(best_result)

main(num_samples=2, gpus_per_trial=1, epochs=2, smoke_test=True, folds=5)

0,1
Current time:,2024-11-28 22:50:21
Running for:,00:00:32.11
Memory:,10.4/15.9 GiB

Trial name,# failures,error file
train_model_cv_b0d04_00000,1,C:/Users/johan/AppData/Local/Temp/ray/session_2024-11-28_22-49-39_609521_2844/artifacts/2024-11-28_22-49-49/train_model_cv_2024-11-28_22-49-38/driver_artifacts/trial_b0d04_00000_lr=6.9e-06_opt=AdamW_bs=4_model=AttentionUnet_freeze=True_loss=DiceLoss/error.txt

Trial name,status,loc,batch_size,freeze_encoder,loss_function,lr,model,optimizer
train_model_cv_b0d04_00001,PENDING,,4,False,DiceLoss,5.7109e-06,AttentionUnet,AdamW
train_model_cv_b0d04_00002,PENDING,,4,True,BCELoss,0.000110082,AttentionUnet,AdamW
train_model_cv_b0d04_00003,PENDING,,4,False,BCELoss,0.000315215,AttentionUnet,AdamW
train_model_cv_b0d04_00004,PENDING,,4,True,DiceBCELoss,0.000550301,AttentionUnet,AdamW
train_model_cv_b0d04_00005,PENDING,,4,False,DiceBCELoss,9.06622e-06,AttentionUnet,AdamW
train_model_cv_b0d04_00006,PENDING,,4,True,DiceLoss,1.01953e-05,Unet,AdamW
train_model_cv_b0d04_00007,PENDING,,4,False,DiceLoss,0.00252428,Unet,AdamW
train_model_cv_b0d04_00008,PENDING,,4,True,BCELoss,0.00830767,Unet,AdamW
train_model_cv_b0d04_00009,PENDING,,4,False,BCELoss,0.00192881,Unet,AdamW
train_model_cv_b0d04_00010,PENDING,,4,True,DiceBCELoss,1.31017e-05,Unet,AdamW


[36m(train_model_cv pid=13784)[0m Training on fold 1 out of 5
[36m(train_model_cv pid=13784)[0m [neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/OCTAA/OCTSegmenter/e/OCT-192


2024-11-28 22:50:17,360	ERROR tune_controller.py:1331 -- Trial task failed for trial train_model_cv_b0d04_00000
Traceback (most recent call last):
  File "c:\Users\johan\anaconda3\envs\dl\Lib\site-packages\ray\air\execution\_internal\event_manager.py", line 110, in resolve_future
    result = ray.get(future)
             ^^^^^^^^^^^^^^^
  File "c:\Users\johan\anaconda3\envs\dl\Lib\site-packages\ray\_private\auto_init_hook.py", line 21, in auto_init_wrapper
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "c:\Users\johan\anaconda3\envs\dl\Lib\site-packages\ray\_private\client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\johan\anaconda3\envs\dl\Lib\site-packages\ray\_private\worker.py", line 2656, in get
    values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
                                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\johan\anaco

In [5]:
import os
from ray import tune
from ray.train import Result

storage_path = r"C:\Users\johan\ray_results"
exp_name = "train_model_cv_2024-11-26_18-30-35"
experiment_path = os.path.join(storage_path, exp_name)
print(f"Loading results from {experiment_path}...")

restored_tuner = tune.Tuner.restore(experiment_path, trainable=train_model_cv)
result_grid = restored_tuner.get_results()

# Check if there have been errors
if result_grid.errors:
    print("One of the trials failed!")
else:
    print("No errors!")
    
# Get the result with the maximum test set `mean_accuracy`
best_result: Result = result_grid.get_best_result()

# Load the best checkpoint if it exists
if best_result.checkpoint:
    checkpoint_path = os.path.join(best_result.checkpoint.to_directory(), "checkpoint.pt")
else:
    raise ValueError("No checkpoint found for the best result.")

Loading results from C:\Users\johan\ray_results\train_model_cv_2024-11-26_18-30-35...
One of the trials failed!


OSError: [WinError 112] Failed copying 'C:/Users/johan/ray_results/train_model_cv_2024-11-26_18-30-35/trial_2b60a_00035_lr=1.0e-04_opt=RMSprop_bs=4/checkpoint_000004/checkpoint.pt' to 'C:/Users/johan/AppData/Local/Temp/checkpoint_tmp_eb1ac5460b9145e2847271a019179487/checkpoint.pt'. Detail: [Windows error 112] Der er ikke tilstrækkelig plads på disken.


In [None]:
import os
from ray import tune
from ray.train import Result

storage_path = r"C:\Users\johan\ray_results"
exp_name = "train_model_cv_2024-11-26_16-31-22"
experiment_path = os.path.join(storage_path, exp_name)
print(f"Loading results from {experiment_path}...")

restored_tuner = tune.Tuner.restore(experiment_path, trainable=train_model_cv)
result_grid = restored_tuner.get_results()

# Check if there have been errors
if result_grid.errors:
    print("One of the trials failed!")
else:
    print("No errors!")

num_results = len(result_grid)
print("Number of results:", num_results)

# Iterate over results
for i, result in enumerate(result_grid):
    if result.error:
        print(f"Trial #{i} had an error:", result.error)
        continue

    print(f"Trial #{i} finished successfully with a loss of: {result.metrics['loss']}")

results_df = result_grid.get_dataframe()
results_df[["training_iteration", "loss"]]

print("Shortest training time:", results_df["time_total_s"].min())
print("Longest training time:", results_df["time_total_s"].max())

ax = None
for result in result_grid:
    label = f"lr={result.config['lr']:.4f}, batch_size={result.config['batch_size']}, optimizer={result.config['optimizer']}, fold={result.config['fold']}"
    if ax is None:
        ax = result.metrics_dataframe.plot("training_iteration", "loss", label=label)
    else:
        result.metrics_dataframe.plot("training_iteration", "loss", ax=ax, label=label)
ax.set_title("Loss vs. Training Iteration for All Trials")
ax.set_ylabel("Loss")

# Get the result with the maximum test set `mean_accuracy`
best_result: Result = result_grid.get_best_result()

print("Best trial config: {}".format(best_result.config))

# Get the best trial's final loss and accuracy
best_loss = best_result.metrics["loss"]
print("Best trial final test set loss: {}".format(best_loss))
best_accuracy = best_result.metrics["accuracy"]
print("Best trial final test set accuracy: {}".format(best_accuracy))

# Load the best model
if torch.cuda.is_available():
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")

best_trained_model = UnetNoPretraining().to(device)

# Load the best checkpoint
checkpoint_path = os.path.join(best_result.checkpoint.to_directory(), "checkpoint.pt")

model_state, optimizer_state = torch.load(checkpoint_path, weights_only=True)
best_trained_model.load_state_dict(model_state)

# Set the model to evaluation mode
best_trained_model.eval()

# Load a sample image from the test dataset
root_dir = r"D:\OneDrive - Aarhus Universitet\9. Semester\Deep Learning\data_gentuity"
transform = transforms.Compose([
    transforms.Resize((256, 256), interpolation=Image.NEAREST),
    transforms.ToTensor(),
])
test_dataset = OCTDataset(root_dir, transform=transform, train=False, is_gentuity=True)
random_indices = np.random.choice(len(test_dataset), 1, replace=False)
sample_image, sample_mask, _, _ = test_dataset[random_indices[0]]  # Change the index to load a different sample

# Move the sample image to the appropriate device
sample_image = sample_image.to(device).unsqueeze(0)  # Add batch dimension

# Make a prediction
with torch.no_grad():
    prediction = best_trained_model(sample_image)

# Convert the prediction to a binary mask
predicted_mask = (prediction > 0.5).float()

# Plot the sample image, ground truth mask, and predicted mask
fig, ax = plt.subplots(1, 3, figsize=(18, 6))

ax[0].imshow(sample_image.squeeze().permute(1, 2, 0).cpu().numpy())
ax[0].set_title("Sample Image")
ax[0].axis('off')

ax[1].imshow(sample_mask.squeeze().cpu().numpy(), cmap='gray')
ax[1].set_title("Ground Truth Mask")
ax[1].axis('off')

ax[2].imshow(predicted_mask.squeeze().cpu().numpy(), cmap='gray')
ax[2].set_title("Predicted Mask")
ax[2].axis('off')

plt.show()

- train_model_cv_83259_00005: FileNotFoundError('Could not fetch metrics for train_model_cv_83259_00005: both result.json and progress.csv were not found at C:/Users/johan/ray_results/train_model_cv_2024-11-26_16-31-22/trial_83259_00005_lr=1.0e-04_opt=Adam_bs=4')
- train_model_cv_83259_00022: FileNotFoundError('Could not fetch metrics for train_model_cv_83259_00022: both result.json and progress.csv were not found at C:/Users/johan/ray_results/train_model_cv_2024-11-26_16-31-22/trial_83259_00022_lr=1.0e-04_opt=SGD_bs=4')
- train_model_cv_83259_00017: FileNotFoundError('Could not fetch metrics for train_model_cv_83259_00017: both result.json and progress.csv were not found at C:/Users/johan/ray_results/train_model_cv_2024-11-26_16-31-22/trial_83259_00017_lr=1.0e-04_opt=SGD_bs=4')
- train_model_cv_83259_00033: FileNotFoundError('Could not fetch metrics for train_model_cv_83259_00033: both result.json and progress.csv were not found at C:/Users/johan/ray_results/train_model_cv_2024-11-26_

Loading results from C:\Users\johan\ray_results\train_model_cv_2024-11-26_16-31-22...
One of the trials failed!
Number of results: 45


KeyError: 'loss'