In [3]:
import numpy as np
import time
import csv
import os

from skimage import io

import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint as tf_ModelCheckpoint
from tensorflow.keras.callbacks import EarlyStopping

from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
from pytorch_lightning.trainer import Trainer
from pytorch_lightning.loggers import CSVLogger
from torch.utils.data import DataLoader

import sys
sys.path.append('/data/ihidalgo/MicroscoPy')        

from microscopy import datasets
from microscopy import utils
from microscopy import metrics
from microscopy import model_utils
from microscopy import optimizer_scheduler_utils
from microscopy import tensorflow_callbacks

Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]




Loading model from: /home/ihidalgo/miniconda3/envs/microscopy/lib/python3.9/site-packages/lpips/weights/v0.1/alex.pth
Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off]




Loading model from: /home/ihidalgo/miniconda3/envs/microscopy/lib/python3.9/site-packages/lpips/weights/v0.1/vgg.pth
Selecting OpenCL device: NVIDIA GeForce RTX 2080 Ti
2.8.4




In [24]:
import hydra
import os

from hydra import compose, initialize
from omegaconf import OmegaConf, DictConfig

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
from omegaconf import DictConfig


In [27]:
def load_path(dataset_root, dataset_name, folder):
    if folder is not None:
        return os.path.join(dataset_root, dataset_name, folder)
    else:
        return None

def my_app(cfg: DictConfig) -> None:
    print(cfg)
    #"LiveFActinDataset", "EM", "MitoTracker_small", "F-actin", "ER", "MT", "MT-SMLM_all"
    for dataset_name in ["F-actin"]:  
        cfg.dataset_name = dataset_name
        train_lr, train_hr, val_lr, val_hr, test_lr, test_hr = cfg.used_dataset.data_paths

        dataset_root = "datasets" if os.path.exists("datasets") else "../datasets"
        train_lr_path = load_path(dataset_root, dataset_name, train_lr)
        train_hr_path = load_path(dataset_root, dataset_name, train_hr)
        val_lr_path = load_path(dataset_root, dataset_name, val_lr)
        val_hr_path = load_path(dataset_root, dataset_name, val_hr)
        test_lr_path = load_path(dataset_root, dataset_name, test_lr)
        test_hr_path = load_path(dataset_root, dataset_name, test_hr)

        # "unet", "rcan", "dfcan", "wdsr", "wgan", "esrganplus", "cddpm"
        model_name = 'cddpm'
        batch_size = 4
        num_epochs = 5
        lr = 0.001
        discriminator_lr = 0.001
        scheduler = 'ReduceOnPlateau'
        optimizer = 'adam'
        
        cfg.model_name = model_name
        cfg.hyperparam.batch_size = batch_size
        cfg.hyperparam.num_epochs = num_epochs
        cfg.hyperparam.lr = lr
        cfg.hyperparam.discriminator_lr = discriminator_lr

        cfg.hyperparam.scheduler = scheduler
        cfg.hyperparam.discriminator_lr_scheduler = scheduler
        cfg.hyperparam.optimizer = optimizer
        cfg.hyperparam.discriminator_optimizer = optimizer

        cfg.model.optim.early_stop.patience = num_epochs
        save_folder = "scale" + str(cfg.used_dataset.scale)
        if cfg.hyperparam.additional_folder is not None:
            save_folder += "_" + cfg.hyperparam.additional_folder

        saving_path = "./results/{}/{}/{}/epc{}_btch{}_lr{}_optim-{}_lrsched-{}_seed{}".format(
            cfg.dataset_name,
            cfg.model_name,
            save_folder,
            cfg.hyperparam.num_epochs,
            cfg.hyperparam.batch_size,
            cfg.hyperparam.lr,
            cfg.hyperparam.optimizer,
            cfg.hyperparam.scheduler,
            cfg.hyperparam.seed
        )

        return cfg, train_lr_path, train_hr_path, val_lr_path, val_hr_path, test_lr_path, test_hr_path, saving_path, 1

cfg = cfg = OmegaConf.load('../conf/config.yaml')
cfg, train_lr_path, train_hr_path, val_lr_path, val_hr_path, test_lr_path, test_hr_path, saving_path, verbose = my_app(cfg)

{'defaults': [{'dataset': 'default.yaml'}, {'model': 'default.yaml'}, {'hyperparam': 'default.yaml'}, '_self_'], 'model.optim.early_stop.patience': '${hyperparam.num_epochs}', 'dataset_name': 'EM', 'model_name': 'unet', 'used_dataset': '${dataset.${dataset_name}}', 'used_model': '${model.${model_name}}', 'used_optim': '${model.optim.${hyperparam.optimizer}}', 'used_optim_d': '${model.optim.${hyperparam.discriminator_optimizer}}', 'used_sched': '${model.optim.${hyperparam.scheduler}}', 'used_sched_d': '${model.optim.${hyperparam.discriminator_lr_scheduler}}'}


InterpolationKeyError: Interpolation key 'dataset.F-actin' not found
    full_key: used_dataset
    object_type=dict

usage: ipykernel_launcher.py [--help] [--hydra-help] [--version]
                             [--cfg {job,hydra,all}] [--resolve]
                             [--package PACKAGE] [--run] [--multirun]
                             [--shell-completion] [--config-path CONFIG_PATH]
                             [--config-name CONFIG_NAME]
                             [--config-dir CONFIG_DIR]
                             [--experimental-rerun EXPERIMENTAL_RERUN]
                             [--info [{all,config,defaults,defaults-tree,plugins,searchpath}]]
                             [overrides ...]
ipykernel_launcher.py: error: argument --shell-completion/-sc: ignored explicit argument '9002'


AttributeError: 'tuple' object has no attribute 'tb_frame'

In [10]:
data_name = 'F-actin'

train_lr_path = '../../datasets/F-actin/train/training_wf'
train_hr_path = '../../datasets/F-actin/train/training_gt'
val_lr_path = '../../datasets/F-actin/val/validate_wf'
val_hr_path = '../../datasets/F-actin/val/validate_gt'
test_lr_path = '../../datasets/F-actin/test/test_wf/level_01'
test_hr_path = '../../datasets/F-actin/test/test_gt'

train_extension_list = [
    os.path.splitext(e)[1] for e in os.listdir(train_hr_path)
]
train_extension = max(set(train_extension_list), key=train_extension_list.count)
train_filenames = sorted(
    [x for x in os.listdir(train_hr_path) if x.endswith(train_extension)]
)

validation_split = 0.1
if val_hr_path is None or val_lr_path is None:
    val_lr_path = train_lr_path
    val_hr_path = train_hr_path

    val_filenames = train_filenames[
        int(len(train_filenames) * (1 - validation_split )) :
    ]
    train_filenames = train_filenames[
        : int(len(train_filenames) * (1 - validation_split))
    ]
else:
    val_lr_path = val_lr_path
    val_hr_path = val_hr_path

    val_extension_list = [
        os.path.splitext(e)[1] for e in os.listdir(val_hr_path)
    ]
    val_extension = max(set(val_extension_list), key=val_extension_list.count)
    val_filenames = sorted(
        [x for x in os.listdir(val_hr_path) if x.endswith(val_extension)]
    )

test_lr_path = test_lr_path
test_hr_path = test_hr_path
test_extension_list = [
    os.path.splitext(e)[1] for e in os.listdir(test_hr_path)
]
test_extension = max(set(test_extension_list), key=test_extension_list.count)
test_filenames = sorted(
    [x for x in os.listdir(test_hr_path) if x.endswith(test_extension)]
)

print(len((os.listdir(train_hr_path))))
print(len((os.listdir(train_lr_path))))
print(len((os.listdir(val_hr_path))))
print(len((os.listdir(val_lr_path))))
print(len((os.listdir(test_hr_path))))
print(len((os.listdir(test_lr_path))))

372
372
60
60
15
15


In [None]:
crappifier_method = 'downsampleonly'
scale_factor = None
lr_patch_size_x = 64
lr_patch_size_y = 64
datagen_sampling_pdf = 0

rotation = True
horizontal_flip = True
vertical_flip = True

model_name = 'cddpm'
num_epochs = 3
batch_size = 4
learning_rate = 0.001
discriminator_learning_rate = 0.001
optimizer_name = 'OneCycle'
discriminator_optimizer = 'OneCycle'
lr_scheduler_name = 'adam'
discriminator_lr_scheduler = 'adam'

test_metric_indexes = [1,2,3,4,5]

additional_folder = ''
seed = 666

verbose = True

utils.set_seed(seed)

save_folder = "scale" + str(scale_factor)

if additional_folder:
    save_folder += "_" + additional_folder

saving_path = .

os.makedirs(saving_path, exist_ok=True)
utils.save_yaml(
    config,
    os.path.join(saving_path, "train_configuration.yaml"),
)


In [None]:


class ModelsTrainer:
    def __init__(
        self,
        config,
        train_lr_path,
        train_hr_path,
        val_lr_path,
        val_hr_path,
        test_lr_path,
        test_hr_path,
        saving_path,
        verbose=0,
    ):


        # To calculate the input and output shape and the actual scale factor 

        (
            _,
            train_input_shape,
            train_output_shape,
            actual_scale_factor,
        ) = datasets.TFDataset(
            filenames=self.train_filenames,
            hr_data_path=self.train_hr_path,
            lr_data_path=self.train_lr_path,
            scale_factor=self.scale_factor,
            crappifier_name=self.crappifier_method,
            lr_patch_shape=(self.lr_patch_size_x, self.lr_patch_size_y),
            datagen_sampling_pdf=self.datagen_sampling_pdf,
            validation_split=0.1,
            batch_size=self.batch_size,
            rotation=self.rotation,
            horizontal_flip=self.horizontal_flip,
            vertical_flip=self.vertical_flip,
        )

        self.input_data_shape = train_input_shape
        self.output_data_shape = train_output_shape

        if self.scale_factor is None or self.scale_factor != actual_scale_factor:
            self.scale_factor = actual_scale_factor
            utils.update_yaml(
                os.path.join(self.saving_path, "train_configuration.yaml"),
                "actual_scale_factor",
                actual_scale_factor,
            )
            if self.verbose:
                print(
                    "Actual scale factor that will be used is: {}".format(
                        self.scale_factor
                    )
                )


        print("\n" + "-" * 10)
        print(
            "{} model will be trained with the next configuration".format(
                self.model_name
            )
        )
        print("Dataset: {}".format(self.data_name))
        print("\tTrain wf path: {}".format(train_lr_path))
        print("\tTrain gt path: {}".format(train_hr_path))
        print("\tVal wf path: {}".format(val_lr_path))
        print("\tVal gt path: {}".format(val_hr_path))
        print("\tTest wf path: {}".format(test_lr_path))
        print("\tTest gt path: {}".format(test_hr_path))
        print("Preprocessing info:")
        print("\tScale factor: {}".format(self.scale_factor))
        print("\tCrappifier method: {}".format(self.crappifier_method))
        print("\tPatch size: {} x {}".format(self.lr_patch_size_x, self.lr_patch_size_y))
        print("Training info:")
        print("\tEpochs: {}".format(self.num_epochs))
        print("\tBatchsize: {}".format(self.batch_size))
        print("\tGen learning rate: {}".format(self.learning_rate))
        print("\tDisc learning rate: {}".format(self.discriminator_learning_rate))
        print("\tGen optimizer: {}".format(self.optimizer_name))
        print("\tDisc optimizer: {}".format(self.discriminator_optimizer))
        print("\tGen scheduler: {}".format(self.lr_scheduler_name))
        print("\tDisc scheduler: {}".format(self.discriminator_lr_scheduler))
        print("-" * 10)

    def launch(self):
        self.prepare_data()
        self.train_model()
        self.predict_images()
        self.eval_model()

        return self.history

    def prepare_data(self):
        raise NotImplementedError("prepare_data() not implemented.")

    def train_model(self):
        raise NotImplementedError("train_model() not implemented.")

    def predict_images(self):
        raise NotImplementedError("predict_images() not implemented")

    def eval_model(self):
        if self.verbose:
            utils.print_info("eval_model() - self.Y_test", self.Y_test)
            utils.print_info("eval_model() - self.predictions", self.predictions)
            utils.print_info("eval_model() - self.X_test", self.X_test)

        print("The predictions will be evaluated:")
        metrics_dict = metrics.obtain_metrics(
            gt_image_list=self.Y_test,
            predicted_image_list=self.predictions,
            wf_image_list=self.X_test,
            test_metric_indexes=self.test_metric_indexes,
        )

        os.makedirs(self.saving_path + "/test_metrics", exist_ok=True)

        for key in metrics_dict.keys():
            if len(metrics_dict[key]) > 0:
                print("{}: {}".format(key, np.mean(metrics_dict[key])))
                np.save(
                    self.saving_path + "/test_metrics/" + key + ".npy",
                    metrics_dict[key],
                )


class TensorflowTrainer(ModelsTrainer):
    def __init__(
        self,
        config,
        train_lr_path,
        train_hr_path,
        val_lr_path,
        val_hr_path,
        test_lr_path,
        test_hr_path,
        saving_path,
        verbose=0,
    ):
        super().__init__(
            config,
            train_lr_path,
            train_hr_path,
            val_lr_path,
            val_hr_path,
            test_lr_path,
            test_hr_path,
            saving_path,
            verbose=verbose,
        )

        self.library_name = "tensorflow"

    def prepare_data(self):
        train_generator, train_input_shape,train_output_shape, actual_scale_factor = datasets.TFDataset(
            filenames=self.train_filenames,
            hr_data_path=self.train_hr_path,
            lr_data_path=self.train_lr_path,
            scale_factor=self.scale_factor,
            crappifier_name=self.crappifier_method,
            lr_patch_shape=(self.lr_patch_size_x, self.lr_patch_size_y),
            datagen_sampling_pdf=self.datagen_sampling_pdf,
            validation_split=0.1,
            batch_size=self.batch_size,
            rotation=self.rotation,
            horizontal_flip=self.horizontal_flip,
            vertical_flip=self.vertical_flip,
        )

        val_generator, _, _, _ = datasets.TFDataset(
            filenames=self.val_filenames,
            hr_data_path=self.val_hr_path,
            lr_data_path=self.val_lr_path,
            scale_factor=self.scale_factor,
            crappifier_name=self.crappifier_method,
            lr_patch_shape=(self.lr_patch_size_x, self.lr_patch_size_y),
            datagen_sampling_pdf=self.datagen_sampling_pdf,
            validation_split=0.1,
            batch_size=self.batch_size,
            rotation=self.rotation,
            horizontal_flip=self.horizontal_flip,
            vertical_flip=self.vertical_flip,
        )

        self.input_data_shape = train_input_shape
        self.output_data_shape = train_output_shape

        if self.verbose:
            print("input_data_shape: {}".format(self.input_data_shape))
            print("output_data_shape: {}".format(self.output_data_shape))

        if self.scale_factor is None or self.scale_factor != actual_scale_factor:
            self.scale_factor = actual_scale_factor
            utils.update_yaml(
                os.path.join(self.saving_path, "train_configuration.yaml"),
                "actual_scale_factor",
                actual_scale_factor,
            )
            if self.verbose:
                print(
                    "Actual scale factor that will be used is: {}".format(
                        self.scale_factor
                    )
                )

        utils.update_yaml(
            os.path.join(self.saving_path, "train_configuration.yaml"),
            "input_data_shape",
            self.input_data_shape,
        )
        utils.update_yaml(
            os.path.join(self.saving_path, "train_configuration.yaml"),
            "output_data_shape",
            self.output_data_shape,
        )

        self.train_generator = train_generator
        self.val_generator = val_generator

    def train_model(self):

        callbacks = []

        lr_schedule = optimizer_scheduler_utils.select_lr_schedule(
                    library_name=self.library_name,
                    lr_scheduler_name=self.lr_scheduler_name,
                    data_len=self.input_data_shape[0] // self.batch_size,
                    num_epochs=self.num_epochs,
                    learning_rate=self.learning_rate,
                    monitor_loss='val_ssim_loss',
                    name=None,
                    optimizer=None,
                    frequency=None,
                    additional_configuration=self.config,
        )

        if self.lr_scheduler_name in ["CosineDecay", "MultiStepScheduler"]:
            self.optim = optimizer_scheduler_utils.select_optimizer(
                library_name=self.library_name,
                optimizer_name=self.optimizer_name,
                learning_rate=lr_schedule,
                check_point=None,
                parameters=None,
                additional_configuration=self.config
            )
        else:
            self.optim = optimizer_scheduler_utils.select_optimizer(
                library_name=self.library_name,
                optimizer_name=self.optimizer_name,
                learning_rate=self.learning_rate,
                check_point=None,
                parameters=None,
                additional_configuration=self.config
            )
            callbacks.append(lr_schedule)

        model = model_utils.select_model(
            model_name=self.model_name,
            input_shape=self.input_data_shape,
            output_channels=self.output_data_shape[-1],
            scale_factor=self.scale_factor,
            datagen_sampling_pdf=self.datagen_sampling_pdf,
            model_configuration=self.config.used_model,
            batch_size=self.batch_size,
        )

        loss_funct = tf.keras.losses.mean_absolute_error
        eval_metric = tf.keras.losses.mean_squared_error

        model.compile(
            optimizer=self.optim,
            loss=loss_funct,
            metrics=[eval_metric, utils.ssim_loss],
        )

        trainableParams = np.sum(
            [np.prod(v.get_shape()) for v in model.trainable_weights]
        )
        nonTrainableParams = np.sum(
            [np.prod(v.get_shape()) for v in model.non_trainable_weights]
        )
        totalParams = trainableParams + nonTrainableParams

        model_checkpoint = tf_ModelCheckpoint(
            os.path.join(self.saving_path, "weights_best.h5"),
            monitor="val_loss",
            verbose=1,
            save_best_only=True,
            save_weights_only=True,
        )
        callbacks.append(model_checkpoint)

        # callback for early stopping
        earlystopper = EarlyStopping(
            monitor=self.config.model.optim.early_stop.loss,
            patience=self.config.model.optim.early_stop.patience,
            min_delta=0.005,
            mode=self.config.model.optim.early_stop.mode,
            verbose=1,
            restore_best_weights=True,
        )
        callbacks.append(earlystopper)

        for x, y in self.val_generator:
            x_val = x
            y_val = y
            break

        plt_saving_path = os.path.join(self.saving_path, "training_images")
        os.makedirs(plt_saving_path, exist_ok=True)
        plot_callback = tensorflow_callbacks.PerformancePlotCallback(
            x_val, y_val, plt_saving_path, frequency=5, is_cddpm=self.model_name=="cddpm"
        )
        callbacks.append(plot_callback)

        if self.verbose:
            print("Model configuration:")
            print(f"\tModel_name: {self.model_name}")
            print(f"\tOptimizer: {self.optim}")
            print(f"\tLR scheduler: {lr_schedule}")
            print(f"\tLoss: {loss_funct}")
            print(f"\tEval: {eval_metric}")
            print(
                "Trainable parameteres: {} \nNon trainable parameters: {} \nTotal parameters: {}".format(
                    trainableParams, nonTrainableParams, totalParams
                )
            )

        if self.model_name == "cddpm":
            # calculate mean and variance of training dataset for normalization
            model.normalizer.adapt(self.train_generator.map(lambda x, y: x))

        start = time.time()

        print("Training is going to start:")
        history = model.fit(
            self.train_generator,
            validation_data=self.val_generator,
            epochs=self.num_epochs,
            callbacks=callbacks,
        )

        dt = time.time() - start
        mins, sec = divmod(dt, 60)
        hour, mins = divmod(mins, 60)
        print(
            "\nTime elapsed:", hour, "hour(s)", mins, "min(s)", round(sec), "sec(s)\n"
        )

        model.save_weights(os.path.join(self.saving_path, "weights_last.h5"))
        self.history = history

        os.makedirs(self.saving_path + "/train_metrics", exist_ok=True)

        for key in history.history:
            np.save(
                self.saving_path + "/train_metrics/" + key + ".npy",
                history.history[key],
            )
        np.save(self.saving_path + "/train_metrics/time.npy", np.array([dt]))

    def predict_images(self):
        ground_truths = []
        widefields = []
        predictions = []
        print("Prediction is going to start:")
        for test_filename in self.test_filenames:
            lr_images, hr_images, _ = datasets.extract_random_patches_from_folder(
                hr_data_path=self.test_hr_path,
                lr_data_path=self.test_lr_path,
                filenames=[test_filename],
                scale_factor=self.scale_factor,
                crappifier_name=self.crappifier_method,
                lr_patch_shape=None,
                datagen_sampling_pdf=1,
            )

            hr_images = np.expand_dims(hr_images, axis=-1)
            lr_images = np.expand_dims(lr_images, axis=-1)

            ground_truths.append(hr_images[0, ...])
            widefields.append(lr_images[0, ...])
            
            if self.model_name == "unet":
                if self.verbose:
                    print("Padding will be added to the images.")
                    print("LR images before padding:")
                    print(
                        "LR images - shape:{} dtype:{}".format(
                            lr_images.shape, lr_images.dtype
                        )
                    )

                height_padding, width_padding = utils.calculate_pad_for_Unet(
                    lr_img_shape=lr_images[0].shape,
                    depth_Unet=self.config.used_model.depth,
                    is_pre=True,
                    scale=self.scale_factor,
                )

                if self.verbose and (
                    height_padding == (0, 0) and width_padding == (0, 0)
                ):
                    print("No padding has been needed to be added.")

                lr_images = utils.add_padding_for_Unet(
                    lr_imgs=lr_images,
                    height_padding=height_padding,
                    width_padding=width_padding,
                )

            if self.verbose:
                print(
                    "HR images - shape:{} dtype:{}".format(
                        hr_images.shape, hr_images.dtype
                    )
                )
                print(
                    "LR images - shape:{} dtype:{}".format(
                        lr_images.shape, lr_images.dtype
                    )
                )

            if self.config.model.others.positional_encoding:
                lr_images = utils.concatenate_encoding(
                    lr_images,
                    self.config.model.others.positional_encoding_channels,
                )

            optim = optimizer_scheduler_utils.select_optimizer(
                library_name=self.library_name,
                optimizer_name=self.optimizer_name,
                learning_rate=self.learning_rate,
                check_point=None,
                parameters=None,
                additional_configuration=self.config,
            )

            model = model_utils.select_model(
                model_name=self.model_name,
                input_shape=lr_images.shape,
                output_channels=hr_images.shape[-1],
                scale_factor=self.scale_factor,
                datagen_sampling_pdf=self.datagen_sampling_pdf,
                model_configuration=self.config.used_model,
            )

            loss_funct = "mean_absolute_error"
            eval_metric = "mean_squared_error"

            model.compile(
                optimizer=optim, loss=loss_funct, metrics=[eval_metric, utils.ssim_loss]
            )

            # Load old weights
            model.load_weights(os.path.join(self.saving_path, "weights_best.h5"))

            aux_prediction = model.predict(lr_images, batch_size=1)

            if self.model_name == "unet":
                aux_prediction = utils.remove_padding_for_Unet(
                    pad_hr_imgs=aux_prediction,
                    height_padding=height_padding,
                    width_padding=width_padding,
                    scale=self.scale_factor,
                )

            aux_prediction = datasets.normalization(aux_prediction)

            predictions.append(aux_prediction[0, ...])

        self.Y_test = ground_truths
        self.predictions = predictions
        self.X_test = widefields

        assert np.max(self.Y_test) <= 1.0 and np.max(self.predictions) <= 1.0 and np.max(self.X_test) <= 1.0
        assert np.min(self.Y_test) >= 0.0 and np.min(self.predictions) >= 0.0 and np.min(self.X_test) >= 0.0 

        if self.verbose:
            utils.print_info("predict_images() - Y_test", self.Y_test)
            utils.print_info("predict_images() - predictions", self.predictions)
            utils.print_info("predict_images() - X_test", self.X_test)

        # Save the predictions
        os.makedirs(self.saving_path + "/predicted_images", exist_ok=True)

        for i, image in enumerate(predictions):
            tf.keras.preprocessing.image.save_img(
                self.saving_path + "/predicted_images/" + self.test_filenames[i],
                image,
                data_format=None,
                file_format=None,
            )
        print(
            "Predicted images have been saved in: "
            + self.saving_path
            + "/predicted_images"
        )

