<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Setting-up-imports" data-toc-modified-id="Setting-up-imports-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Setting up imports</a></span></li><li><span><a href="#Setting-up-Constant-Hyperparameters" data-toc-modified-id="Setting-up-Constant-Hyperparameters-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Setting up Constant Hyperparameters</a></span></li><li><span><a href="#Setting-up-Parameters-and-Functions-for-Training" data-toc-modified-id="Setting-up-Parameters-and-Functions-for-Training-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Setting up Parameters and Functions for Training</a></span><ul class="toc-item"><li><span><a href="#Hyperparameters-Search-Space" data-toc-modified-id="Hyperparameters-Search-Space-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Hyperparameters Search Space</a></span></li><li><span><a href="#Creating-the-training-function" data-toc-modified-id="Creating-the-training-function-3.2"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>Creating the training function</a></span></li><li><span><a href="#Creating-the-evaluation-function" data-toc-modified-id="Creating-the-evaluation-function-3.3"><span class="toc-item-num">3.3&nbsp;&nbsp;</span>Creating the evaluation function</a></span></li></ul></li><li><span><a href="#Running-the-training" data-toc-modified-id="Running-the-training-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Running the training</a></span><ul class="toc-item"><li><span><a href="#Loading-data-for-training" data-toc-modified-id="Loading-data-for-training-4.1"><span class="toc-item-num">4.1&nbsp;&nbsp;</span>Loading data for training</a></span></li><li><span><a href="#Configuring-the-Tuner-with-a-Scheduler-and-a-Search-Algorithm" data-toc-modified-id="Configuring-the-Tuner-with-a-Scheduler-and-a-Search-Algorithm-4.2"><span class="toc-item-num">4.2&nbsp;&nbsp;</span>Configuring the Tuner with a Scheduler and a Search Algorithm</a></span></li><li><span><a href="#Running-the-Tuner" data-toc-modified-id="Running-the-Tuner-4.3"><span class="toc-item-num">4.3&nbsp;&nbsp;</span>Running the Tuner</a></span></li></ul></li><li><span><a href="#Evaluating-the-best-Results" data-toc-modified-id="Evaluating-the-best-Results-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Evaluating the best Results</a></span></li></ul></div>

# Setting up imports

In [1]:
import os

import torch
from torch.nn import CrossEntropyLoss
from torch.nn.functional import normalize
from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import DataLoader
from torchvision.transforms import GaussianBlur
from torchvision.transforms.functional import invert

import ray
from ray import tune
from ray.air import session, RunConfig
from ray.air.checkpoint import Checkpoint
from ray.tune.schedulers import ASHAScheduler
from ray.tune.search.hyperopt import HyperOptSearch


from Dataset import POCDataReader, data_augment_, POCDataset
from metrics import Metrics, EvaluationMetrics
from models import UNet
from loss import *
from pipelines import *
from train import training_loop, validation_loop
from train_tqdm import evaluation_loop


# Setting up Constant Hyperparameters

In [2]:
EPOCHS = 15
NUM_SAMPLES = 30
NUM_MODEL_TEST = 10

NUM_AUGMENT = 1

LOAD_DATA_ON_GPU = True
GPUS_PER_TRIAL = 1
CPUS_PER_TRIAL = 20

##### Selecting Cuda device

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


# Setting up Parameters and Functions for Training

## Hyperparameters Search Space

In [4]:
search_space = {
    "Network": UNet,
    "Optimizer": Adam,
    
    "Learning Rate": 1e-4,   #tune.qloguniform(1e-5, 1e-2, 5e-6),
    "Batch Size": 4,         #tune.qrandint(2, 8, 2),

    "Pixel Loss": tune.choice([CrossEntropyLoss(weight=torch.tensor([.3, .7])), FocalLoss(weight=torch.tensor([.3, .7]), gamma=2)]),
    "Volume Loss": tune.choice([JaccardLoss(), TverskyLoss(alpha=0.3, beta=0.7), FocalTverskyLoss(alpha=0.3, beta=0.7, gamma=2)]),
    "Combine Loss": tune.choice([CombinedLoss, BorderedLoss, PixelLoss, VolumeLoss]),
    
    "Negative Mining": tune.choice([True, False]),
    "Smooth Labeling": tune.choice([True, False]),

    "Input Filter": tune.choice([None, invert]),
    "Input Layer": tune.choice([None, LaplacianFilter(), SobelFilter()])   #, DINOFilter()]),
}

## Creating the training function

In [5]:
def train(config, train_data, val_data):

    device = "cuda" if torch.cuda.is_available() else "cpu"
    
    inpip = InputPipeline(
        transformer=[normalize, config["Input Filter"]] if config["Input Filter"] is not None else normalize, 
        layer_transformer=config["Input Layer"])
    if LOAD_DATA_ON_GPU:
        inpip = inpip.to(device)

    train_dataset = POCDataset(
        train_data,
        transform=inpip,
        target_transform= GaussianBlur(kernel_size=3, sigma=0.7) if config["Smooth Labeling"] else None,
        negative_mining=config["Negative Mining"],
        load_on_gpu=LOAD_DATA_ON_GPU)
    train_dataset.precompute_transform()

    if LOAD_DATA_ON_GPU:
        training_dataloader = DataLoader(
            train_dataset,
            batch_size=int(config["Batch Size"]),
            sampler=train_dataset.sampler)
    else:
        training_dataloader = DataLoader(
            train_dataset,
            batch_size=int(config["Batch Size"]),
            sampler=train_dataset.sampler,
            num_workers=CPUS_PER_TRIAL//2,
            pin_memory=True,
            pin_memory_device=device)

    val_dataset = POCDataset(
        val_data, 
        transform=inpip, 
        target_transform=None, 
        negative_mining=False, 
        load_on_gpu=LOAD_DATA_ON_GPU)
    val_dataset.precompute_transform()
    
    if LOAD_DATA_ON_GPU:
        validation_dataloader = DataLoader(
            val_dataset,
            batch_size=int(config["Batch Size"]),
            shuffle=True)
    else:
        validation_dataloader = DataLoader(
            val_dataset,
            batch_size=int(config["Batch Size"]),
            shuffle=True,
            num_workers=CPUS_PER_TRIAL//2,
            pin_memory=True,
            pin_memory_device=device)

    model = config["Network"](n_channels=inpip.nb_channel, n_classes=2, bilinear=True, crop=False)
    if torch.cuda.is_available() and torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
    model.to(device)

    loss_fn = config["Combine Loss"](config["Pixel Loss"], config["Volume Loss"]).to(device)
    optimizer = config["Optimizer"](model.parameters(), lr=config["Learning Rate"], betas=(0.9, 0.99))
    lr_scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS//2)

    # To restore a checkpoint, use `session.get_checkpoint()`.
    loaded_checkpoint = session.get_checkpoint()
    if loaded_checkpoint:
        with loaded_checkpoint.as_directory() as loaded_checkpoint_dir:
            model_state, optimizer_state, scheduler_state = torch.load(os.path.join(loaded_checkpoint_dir, "checkpoint.pt"))
        model.load_state_dict(model_state)
        optimizer.load_state_dict(optimizer_state)
        lr_scheduler.load_state_dict(scheduler_state)

    train_metrics = Metrics(
        buffer_size=len(training_dataloader),
        mode="Training",
        hyperparam=config,
        device=device)

    val_metrics = Metrics(
        buffer_size=len(validation_dataloader),
        mode="Validation",
        hyperparam=config,
        device=device)


    for epoch in range(1, EPOCHS+1):  # loop over the dataset multiple times
        training_loop(epoch, training_dataloader, model, loss_fn, optimizer, lr_scheduler, train_metrics, device)
        validation_loop(epoch, validation_dataloader, model, loss_fn, val_metrics, device)

        # Here we save a checkpoint. It is automatically registered with
        # Ray Tune and can be accessed through `session.get_checkpoint()`
        # API in future iterations.
        os.makedirs("model", exist_ok=True)
        torch.save((model.state_dict(), optimizer.state_dict(), lr_scheduler.state_dict()), "model/checkpoint.pt")
        checkpoint = Checkpoint.from_directory("model")
        session.report(metrics=val_metrics.get_metrics(epoch), checkpoint=checkpoint)

    train_metrics.close_tensorboard()
    val_metrics.close_tensorboard()


## Creating the evaluation function

In [51]:
# import metrics
# from metrics import EvaluationMetrics

# import sys
from importlib import reload
# del sys.modules['metrics.metrics'] 
reload(metrics.metrics)
from metrics.metrics import EvaluationMetrics

In [52]:
def evaluate(test_data, result):

    device = "cuda:0" if torch.cuda.is_available() else "cpu"

    inpip = InputPipeline(
        transformer=[normalize, result.config["Input Filter"]] if result.config["Input Filter"] is not None else normalize, 
        layer_transformer=result.config["Input Layer"])
    if LOAD_DATA_ON_GPU:
        inpip = inpip.to(device)

    test_dataset = POCDataset(test_data, transform=inpip, target_transform=None, negative_mining=False, load_on_gpu=LOAD_DATA_ON_GPU)
    
    if LOAD_DATA_ON_GPU:
        evaluation_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=True)
    else:
        evaluation_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=True, num_workers=20, pin_memory=True, pin_memory_device=device)

    best_trained_model = result.config["Network"](n_channels=inpip.nb_channel, n_classes=2, bilinear=True, crop=False).to(device)

    checkpoint_path = os.path.join(result.checkpoint.to_directory(), "checkpoint.pt")
    model_state, _, _ = torch.load(checkpoint_path)
    best_trained_model.load_state_dict(model_state)

    test_metrics = EvaluationMetrics(
        buffer_size=len(evaluation_dataloader),
        hyperparam=result.config,
        epochs=result.metrics["Epoch"],
        device=device)

    evaluation_loop(dataloader=evaluation_dataloader, model=best_trained_model, metric=test_metrics, device=device)

def evaluate_df(test_data, results_df):

    device = "cuda:0" if torch.cuda.is_available() else "cpu"

    results_df.sort_values("CrackIoU", ascending=False, inplace=True)
    for index, res in results_df.head(NUM_MODEL_TEST).iterrows():
        
        inpip = InputPipeline(
            transformer=eval(res["config/Input Filter"]),
            layer_transformer=eval(res["config/Input Layer"]))
        if LOAD_DATA_ON_GPU:
            inpip = inpip.to(device)
        
        test_dataset = POCDataset(test_data, transform=inpip, target_transform=None, negative_mining=False)

        if LOAD_DATA_ON_GPU:
            evaluation_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=True)
        else:
            evaluation_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=True, num_workers=20, pin_memory=True, pin_memory_device=device)

        trained_model = eval(res["config/Network"])(n_channels=inpip.nb_channel, n_classes=2, bilinear=True, crop=False).to(device)

        checkpoint_path = os.path.join(res["logdir"], "model/checkpoint.pt")
        model_state, _, _ = torch.load(checkpoint_path)
        trained_model.load_state_dict(model_state)

        hyperparam = {
            "Network": str(res["config/Network"]),
            "Optimizer": str(res["config/Optimizer"]),
            "Learning Rate": str(res["config/Learning Rate"]),
            "Batch Size": str(res["config/Batch Size"]),
            "Pixel Loss": str(res["config/Pixel Loss"]),
            "Volume Loss": str(res["config/Volume Loss"]),
            "Combine Loss": str(res["config/Combine Loss"]),
            "Negative Mining": str(res["config/Negative Mining"]),
            "Smooth Labeling": str(res["config/Smooth Labeling"]),
            "Input Filter": str(res["config/Input Filter"]),
            "Input Layer": str(res["config/Input Layer"]),
        }
        
        test_metrics = EvaluationMetrics(
            buffer_size=len(evaluation_dataloader),
            hyperparam=hyperparam,
            device=device)

        evaluation_loop(dataloader=evaluation_dataloader, model=trained_model, metric=test_metrics, device=device)


# Running the training

## Loading data for training

In [7]:
data_reader = POCDataReader(root_dir="../data", load_on_gpu=False, verbose=True)
train_data, val_data, test_data = data_reader.split([0.7, 0.1, 0.2])
data_augment_(train_data, n=NUM_AUGMENT, load_on_gpu=False, verbose=True)

Loading dataset into RAM:   0%|          | 0/2744 [00:00<?, ?it/s]

	- Loading done, RAM used: 4.55GiB / free: 38.00GiB / total: 125.40GiB
	- Got a total of 2744 images.


Expending the dataset 1 more times:   0%|          | 0/1920 [00:00<?, ?it/s]

	- Augmentation done, RAM used: 7.31GiB / free: 35.25GiB / total: 125.40GiB
	- Got 1920 new images and a total of 3840 images.


## Configuring the Tuner with a Scheduler and a Search Algorithm

In [8]:
scheduler = ASHAScheduler(max_t=EPOCHS, grace_period=2, reduction_factor=2)
search_algo = HyperOptSearch()

tune_config = tune.TuneConfig(
    metric="CrackIoU",
    mode="max",
    num_samples=NUM_SAMPLES,
    scheduler=scheduler,
    search_alg=search_algo)

tuner = tune.Tuner(
    tune.with_resources(
        tune.with_parameters(train, train_data=train_data, val_data=val_data),
        resources={"cpu": CPUS_PER_TRIAL, "gpu": GPUS_PER_TRIAL}),
    tune_config=tune_config,
    param_space=search_space,
    run_config=RunConfig(local_dir="~/POC-Project/ray_results"))

## Running the Tuner

In [9]:
results = tuner.fit()

2023-03-21 16:40:18,489	INFO worker.py:1544 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


0,1
Current time:,2023-03-22 06:41:27
Running for:,14:00:54.92
Memory:,32.4/125.4 GiB

Trial name,status,loc,Batch Size,Combine Loss,Input Filter,Input Layer,Learning Rate,Negative Mining,Network,Optimizer,Pixel Loss,Smooth Labeling,Volume Loss,iter,total time (s),Epoch,Loss,CrackIoU
train_062106c6,TERMINATED,141.223.108.122:37252,4,<class 'loss.lo_4e10,<function inver_7250,LaplacianFilter,0.0001,False,<class 'models._0380,<class 'torch.o_1b20,CrossEntropyLoss(),False,FocalTverskyLoss,4,1528.19,4,0.0101645,0.726125
train_fb648fde,TERMINATED,141.223.108.122:37338,4,<class 'loss.lo_5590,,LaplacianFilter,0.0001,False,<class 'models._0380,<class 'torch.o_1b20,CrossEntropyLoss(),False,FocalTverskyLoss,15,5665.79,15,0.0102724,0.780001
train_9fc5d37d,TERMINATED,141.223.108.122:37252,4,<class 'loss.lo_51d0,<function inver_7250,LaplacianFilter,0.0001,False,<class 'models._0380,<class 'torch.o_1b20,FocalLoss,False,JaccardLoss,2,764.152,2,0.0016556,0.138095
train_ec537ad3,TERMINATED,141.223.108.122:37252,4,<class 'loss.lo_4a50,,SobelFilter,0.0001,True,<class 'models._0380,<class 'torch.o_1b20,FocalLoss,True,FocalTverskyLoss,8,3021.86,8,0.00805477,0.782492
train_cae22dbf,TERMINATED,141.223.108.122:37252,4,<class 'loss.lo_5590,<function inver_7250,,0.0001,True,<class 'models._0380,<class 'torch.o_1b20,FocalLoss,False,FocalTverskyLoss,15,5628.35,15,0.0190647,0.792872
train_853d0262,TERMINATED,141.223.108.122:37338,4,<class 'loss.lo_51d0,,LaplacianFilter,0.0001,True,<class 'models._0380,<class 'torch.o_1b20,FocalLoss,False,TverskyLoss,2,766.324,2,0.00122544,0.163182
train_d41c8b41,TERMINATED,141.223.108.122:37338,4,<class 'loss.lo_4a50,<function inver_7250,,0.0001,False,<class 'models._0380,<class 'torch.o_1b20,FocalLoss,True,TverskyLoss,2,765.478,2,0.104972,0.596321
train_d4177694,TERMINATED,141.223.108.122:37338,4,<class 'loss.lo_4a50,,,0.0001,True,<class 'models._0380,<class 'torch.o_1b20,CrossEntropyLoss(),True,TverskyLoss,4,1514.52,4,0.0680096,0.742846
train_516dd362,TERMINATED,141.223.108.122:37338,4,<class 'loss.lo_4a50,,LaplacianFilter,0.0001,False,<class 'models._0380,<class 'torch.o_1b20,FocalLoss,False,JaccardLoss,15,5669.97,15,0.0458218,0.830887
train_e8fc8198,TERMINATED,141.223.108.122:37252,4,<class 'loss.lo_51d0,,LaplacianFilter,0.0001,True,<class 'models._0380,<class 'torch.o_1b20,FocalLoss,True,JaccardLoss,2,763.549,2,0.00123841,0.175681


Trial name,CrackIoU,Epoch,Loss,MeanIoU,Tversky,date,done,episodes_total,experiment_id,hostname,iterations_since_restore,node_ip,pid,should_checkpoint,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
train_0445f4ab,0.819906,15,0.046882,0.908589,0.912501,2023-03-22_05-29-38,True,,dad235558d3f40dc91b788937bbb28f8,pirl-PowerEdge-T640,15,141.223.108.122,37338,True,5691.39,378.468,5691.39,1679430578,0,,15,0445f4ab,0.0266371
train_062106c6,0.726125,4,0.0101645,0.860695,0.876887,2023-03-21_17-06-03,True,,3c7874d0b49642229cf064d8db90072d,pirl-PowerEdge-T640,4,141.223.108.122,37252,True,1528.19,376.995,1528.19,1679385963,0,,4,062106c6,0.0286627
train_0717f785,0.814646,15,0.0940367,0.905963,0.893023,2023-03-22_00-13-12,True,,3c7874d0b49642229cf064d8db90072d,pirl-PowerEdge-T640,15,141.223.108.122,37252,True,5632.06,374.476,5632.06,1679411592,0,,15,0717f785,0.0286627
train_09e60520,0.826255,15,0.0881142,0.911886,0.90059,2023-03-22_02-20-27,True,,dad235558d3f40dc91b788937bbb28f8,pirl-PowerEdge-T640,15,141.223.108.122,37338,True,5656.1,376.436,5656.1,1679419227,0,,15,09e60520,0.0266371
train_0e8257a8,0.268461,2,0.211027,0.629011,0.336848,2023-03-21_20-21-07,True,,3c7874d0b49642229cf064d8db90072d,pirl-PowerEdge-T640,2,141.223.108.122,37252,True,765.901,376.09,765.901,1679397667,0,,2,0e8257a8,0.0286627
train_1b1add71,0.827104,15,0.0876815,0.912318,0.903761,2023-03-22_01-59-40,True,,3c7874d0b49642229cf064d8db90072d,pirl-PowerEdge-T640,15,141.223.108.122,37252,True,5631.36,374.681,5631.36,1679417980,0,,15,1b1add71,0.0286627
train_28702a06,0.821742,15,0.0903845,0.909616,0.895978,2023-03-22_03-54-47,True,,dad235558d3f40dc91b788937bbb28f8,pirl-PowerEdge-T640,15,141.223.108.122,37338,True,5659.68,376.193,5659.68,1679424887,0,,15,28702a06,0.0266371
train_29189cfb,0.809024,8,0.00736416,0.903141,0.907148,2023-03-21_22-39-20,True,,3c7874d0b49642229cf064d8db90072d,pirl-PowerEdge-T640,8,141.223.108.122,37252,True,3015.25,375.445,3015.25,1679405960,0,,8,29189cfb,0.0286627
train_29ebcf34,0.806877,8,0.0474492,0.901959,0.910966,2023-03-21_23-12-06,True,,dad235558d3f40dc91b788937bbb28f8,pirl-PowerEdge-T640,8,141.223.108.122,37338,True,3020.85,376.485,3020.85,1679407926,0,,8,29ebcf34,0.0266371
train_3a677d53,0.833933,15,0.0842459,0.915754,0.910119,2023-03-22_06-41-27,True,,3c7874d0b49642229cf064d8db90072d,pirl-PowerEdge-T640,15,141.223.108.122,37252,True,5642.19,375.624,5642.19,1679434887,0,,15,3a677d53,0.0286627


2023-03-21 17:06:03,588	INFO tensorboardx.py:267 -- Removed the following hyperparameter values when logging to tensorboard: {'Combine Loss': <class 'loss.loss.BorderedLoss'>, 'Input Filter': <function invert at 0x7f6000027250>, 'Input Layer': LaplacianFilter, 'Network': <class 'models.unet.UNet'>, 'Optimizer': <class 'torch.optim.adam.Adam'>, 'Pixel Loss': CrossEntropyLoss(), 'Volume Loss': FocalTverskyLoss}
2023-03-21 17:18:47,840	INFO tensorboardx.py:267 -- Removed the following hyperparameter values when logging to tensorboard: {'Combine Loss': <class 'loss.loss.PixelLoss'>, 'Input Filter': <function invert at 0x7f6000027250>, 'Input Layer': LaplacianFilter, 'Network': <class 'models.unet.UNet'>, 'Optimizer': <class 'torch.optim.adam.Adam'>, 'Pixel Loss': FocalLoss, 'Volume Loss': JaccardLoss}
2023-03-21 18:09:09,804	INFO tensorboardx.py:267 -- Removed the following hyperparameter values when logging to tensorboard: {'Combine Loss': <class 'loss.loss.CombinedLoss'>, 'Input Layer': 

2023-03-22 01:59:40,186	INFO tensorboardx.py:267 -- Removed the following hyperparameter values when logging to tensorboard: {'Combine Loss': <class 'loss.loss.VolumeLoss'>, 'Input Layer': LaplacianFilter, 'Network': <class 'models.unet.UNet'>, 'Optimizer': <class 'torch.optim.adam.Adam'>, 'Pixel Loss': FocalLoss, 'Volume Loss': JaccardLoss}
2023-03-22 02:20:27,269	INFO tensorboardx.py:267 -- Removed the following hyperparameter values when logging to tensorboard: {'Combine Loss': <class 'loss.loss.VolumeLoss'>, 'Input Layer': LaplacianFilter, 'Network': <class 'models.unet.UNet'>, 'Optimizer': <class 'torch.optim.adam.Adam'>, 'Pixel Loss': FocalLoss, 'Volume Loss': JaccardLoss}
2023-03-22 03:33:35,195	INFO tensorboardx.py:267 -- Removed the following hyperparameter values when logging to tensorboard: {'Combine Loss': <class 'loss.loss.CombinedLoss'>, 'Input Layer': SobelFilter, 'Network': <class 'models.unet.UNet'>, 'Optimizer': <class 'torch.optim.adam.Adam'>, 'Pixel Loss': CrossEntr

# Evaluating the best Results

In [54]:
print(best_result)

Result(metrics={'Epoch': 15, 'Loss': 0.04582178592681885, 'CrackIoU': 0.8308873176574707, 'MeanIoU': 0.9142576456069946, 'Tversky': 0.9118564128875732, 'should_checkpoint': True, 'done': True, 'trial_id': '516dd362', 'experiment_tag': '9_Batch_Size=4,Combine_Loss=class_loss_loss_CombinedLoss,Input_Filter=None,Input_Layer=LaplacianFilter,Learning_Rate=0.0001,Negative_Mining=False,Network=class_models_unet_UNet,Optimizer=class_torch_optim_adam_Adam,Pixel_Loss=FocalLoss,Smooth_Labeling=False,Volume_Loss=JaccardLoss'}, error=None, log_dir=PosixPath('/home/pirl/POC-Project/ray_results/train_2023-03-21_16-39-49/train_516dd362_9_Batch_Size=4,Combine_Loss=class_loss_loss_CombinedLoss,Input_Filter=None,Input_Layer=LaplacianFilter,Learning_Rat_2023-03-21_19-05-50'))


In [53]:
best_result = results.get_best_result(metric="CrackIoU", mode="max", scope="all")  # Get best result object
print("Best trial config: {}".format(best_result.config))
print("Best trial final validation loss: {}".format(best_result.metrics["Loss"]))
print("Best trial final validation CrackIoU: {}".format(best_result.metrics["CrackIoU"]))

for result in results:
    evaluate(test_data=test_data, result=result)

Best trial config: {'Network': <class 'models.unet.UNet'>, 'Optimizer': <class 'torch.optim.adam.Adam'>, 'Learning Rate': 0.0001, 'Batch Size': 4, 'Pixel Loss': FocalLoss, 'Volume Loss': JaccardLoss, 'Combine Loss': <class 'loss.loss.CombinedLoss'>, 'Negative Mining': False, 'Smooth Labeling': False, 'Input Filter': None, 'Input Layer': LaplacianFilter}
Best trial final validation loss: 0.04582178592681885
Best trial final validation CrackIoU: 0.8308873176574707


[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

[Evaluating]:   0%|          | 0/550 [00:00<?, ?it/s]

In [None]:
best_result = results.get_best_result(metric="CrackIoU", mode="max", scope="all")  # Get best result object
print("Best trial config: {}".format(best_result.config))
print("Best trial final validation loss: {}".format(best_result.metrics["Loss"]))
print("Best trial final validation CrackIoU: {}".format(best_result.metrics["CrackIoU"]))

# evaluate(test_data=test_data, result=best_result)

results_df = results.get_dataframe(filter_metric="CrackIoU", filter_mode="max")  # Get all trials by CrackIoU
results_df.sort_values("CrackIoU", ascending=False, inplace=True)

evaluate_df(test_data=test_data, results_df=results_df)