# -------------------------------------------  General info  -------------------------------------------

 - **What is this Jupyter notebook:** A script to test the preprocessed data loader
 - **How to run the script:** Follow the steps in the *Python code* section bellow

# -------------------------------------------  Python code  -------------------------------------------

# Steps to run the script

## 1. Set the config variables

Set the config variables in *config/data/preprocessed/loader/lidc_idri_preprocessed_data_loader.yaml*

## 2. Run the script below 
The output will be the loaded data info

In [1]:
from os.path import abspath, dirname, join
import hydra
import sys
sys.path.append(abspath(join(dirname('.'), "../")))

from src.modules.data.dataloader.preprocessed_data_loader import LIDCIDRIPreprocessedKFoldDataLoader
from src.modules.data.metadata import LIDCIDRIPreprocessedMetaData
from src.modules.utils.paths import PYTHON_PROJECT_DIR_PATH

def print_loaded_data_info(k_fold_data_loaders=False, load_mask=False):
    space = "    " if k_fold_data_loaders else ""
    print(f"{space}    Batch index: {batch_index}")
    print(f"{space}        Data (Lung nodule CT image):")
    print(f"{space}         - Type: {type(data['input_image']).__name__}")
    print(f"{space}         - Shape: {data['input_image'].shape}")
    print(f"{space}         - Min/max values: {data['input_image'].min()}/{data['input_image'].max()}")
    print(f"{space}        Label (Mean lung nodule malignancy)")
    print(f"{space}         - Type: {type(label['lnm']['mean']).__name__}")
    print(f"{space}         - Shape: {label['lnm']['mean'].shape}")
    print(f"{space}         - Min/max values: {label['lnm']['mean'].min()}/{label['lnm']['mean'].max()}")
    
hydra.initialize(config_path='../config', version_base=None)
config = hydra.compose(
    config_name="config", 
    overrides=[
        "data/preprocessed/loader=lidc_idri_preprocessed_data_loader_jn_demo",
        "metadata/preprocessed=lidc_idri_preprocessed_metadata_jn_demo"
    ]
)


  check_for_updates()


In [2]:
metadata = LIDCIDRIPreprocessedMetaData(config=config.metadata.preprocessed)

In [3]:
print("\n------------------------------------ Demonstrating the single-fold data loader ------------------------------------\n")
dataloader = LIDCIDRIPreprocessedKFoldDataLoader(
    config=config.data.preprocessed.loader, 
    lung_nodule_image_metadataframe=metadata.get_lung_nodule_image_metadataframe()
)
data_loaders_by_subset = dataloader.get_data_loaders_by_subset()
for subset_type in ["train", "validation", "test"]:
    print(f"Subset type: {subset_type.title()}")
    for batch_index, (data, label) in enumerate(iter(data_loaders_by_subset[subset_type]), 1):
        print_loaded_data_info()


------------------------------------ Demonstrating the single-fold data loader ------------------------------------

Subset type: Train
    Batch index: 1
        Data (Lung nodule CT image):
         - Type: Tensor
         - Shape: torch.Size([128, 1, 32, 32])
         - Min/max values: -1.0/1.0
        Label (Mean lung nodule malignancy)
         - Type: Tensor
         - Shape: torch.Size([128, 1])
         - Min/max values: 1.0/5.0
    Batch index: 2
        Data (Lung nodule CT image):
         - Type: Tensor
         - Shape: torch.Size([128, 1, 32, 32])
         - Min/max values: -1.0/1.0
        Label (Mean lung nodule malignancy)
         - Type: Tensor
         - Shape: torch.Size([128, 1])
         - Min/max values: 1.0/5.0
    Batch index: 3
        Data (Lung nodule CT image):
         - Type: Tensor
         - Shape: torch.Size([128, 1, 32, 32])
         - Min/max values: -1.0/1.0
        Label (Mean lung nodule malignancy)
         - Type: Tensor
         - Shape: torc

In [4]:
print("\n-------------------------------------- Demonstrating the K-fold data loader ---------------------------------------\n")
config.data.preprocessed.loader.number_of_k_folds = 5
config.data.preprocessed.loader.test_fraction_of_entire_dataset = None
dataloader = LIDCIDRIPreprocessedKFoldDataLoader(
    config=config.data.preprocessed.loader, 
    lung_nodule_image_metadataframe=metadata.get_lung_nodule_image_metadataframe()
)
data_loaders_by_subset = dataloader.get_data_loaders_by_subset()
for subset_type in ["train", "validation", "test"]:
    print(f"Subset type: {subset_type.title()}")
    for fold_index in range(config.data.preprocessed.loader.number_of_k_folds):
        print(f"    Fold index: {fold_index + 1}")
        for batch_index, (data, label) in enumerate(iter(data_loaders_by_subset[subset_type][fold_index]), 1):
            print_loaded_data_info(k_fold_data_loaders=True)


-------------------------------------- Demonstrating the K-fold data loader ---------------------------------------

Subset type: Train
    Fold index: 1
        Batch index: 1
            Data (Lung nodule CT image):
             - Type: Tensor
             - Shape: torch.Size([128, 1, 32, 32])
             - Min/max values: -1.0/1.0
            Label (Mean lung nodule malignancy)
             - Type: Tensor
             - Shape: torch.Size([128, 1])
             - Min/max values: 1.0/5.0
        Batch index: 2
            Data (Lung nodule CT image):
             - Type: Tensor
             - Shape: torch.Size([128, 1, 32, 32])
             - Min/max values: -1.0/1.0
            Label (Mean lung nodule malignancy)
             - Type: Tensor
             - Shape: torch.Size([128, 1])
             - Min/max values: 1.0/5.0
        Batch index: 3
            Data (Lung nodule CT image):
             - Type: Tensor
             - Shape: torch.Size([128, 1, 32, 32])
             - Min/m