In [None]:
import os
os.environ["MXNET_CUDNN_LIB_CHECKING"] = "0"
os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"

Set Experiment configuration

In [None]:
import const
from experimentConfig import ExperimentConfig

config_2022 = ExperimentConfig(
    input_directory = const.images_2022,
    year=2022,
    isVnir=True,
    numberOfimages=648,
    all_bands=True,
    use_hyperparameter_tuning=False,
    use_augmentation=False,
    use_mixup_cutmix=False
)
print(config_2022)
config_2022.makeOutputDir()

Load image and mask

In [None]:
from image_Dictionary import ImageDict
images_2022= ImageDict(False)
image_dict_2022  = images_2022.load_tif_files(config_2022)
masks_2022 = ImageDict(True)
mask_dict_2022  = masks_2022.load_tif_files(config_2022, image_dict_2022)

In [None]:
# `config_2022.use_augmentation` is a boolean indicating whether to use data augmentation.
from train_val_datalaoder import TrainValDataLoader
trainValDataloader = TrainValDataLoader(image_dict_2022, mask_dict_2022, config_2022.use_augmentation)

# Prepare the data loaders with a specified batch size
train_loader, val_loader = trainValDataloader.prepare_data_loaders(batch_size=4)

# Train 2022 images

In [None]:
import time
import mxnet as mx
from mxnet import context
from mxnet.base import MXNetError

from mx_Train import myTrain
from my_Save import saveAsCSV, SaveModels
from myPlots import lossPlot

ctx=context.gpu()
mx.nd.waitall()

# Maximum number of retries
max_retries = 5
retry_count = 0

# Retry logic
while retry_count < max_retries:
    try:
        mxTn = myTrain(train_loader, val_loader, config_2022)
        loss_each_epoch, model_list, epoch = mxTn.train(ctx, epochs = 50)
        saveAsCSV(["Current Epoch", "Training Loss", "Validation Loss"], config_2022.lossFile, loss_each_epoch)
        SaveModels(config_2022.output_models, model_list)
        lossPlot(loss_each_epoch, config_2022.output_directory)
        break
    except MXNetError  as e:
        if 'CUDNN_STATUS_EXECUTION_FAILED' in str(e):
            print(f"cuDNN execution failed. Retrying... ({retry_count + 1}/{max_retries})")
            mx.nd.waitall()  # Clear GPU memory
            time.sleep(5) # Wait for a few seconds before retrying
            retry_count += 1 # Increment the retry counter
        else:
            raise  # If it's another error, raise it

# Check if maximum retries were reached
if retry_count == max_retries:
    print("Maximum retries reached. Training failed due to cuDNN error.")

# Applying model in 2022 images

In [None]:
from postprocessing import predictedImages
vPredictedImages = predictedImages(config_2022, trainValDataloader.val_ids)
vPredictedImages.save_predictions(config_2022, ctx, 0.6, 0.1, image_dict_2022, mask_dict_2022)

# Applying model in 2010 images

In [None]:
config_2010 = ExperimentConfig(
    input_directory = const.images_2010,
    year=2010,
    isVnir=True,
    numberOfimages=648,
    use_hyperparameter_tuning=False,
    use_augmentation=False,
    use_mixup_cutmix=False
)
print(config_2010)
config_2010.makeOutputDir()
images_2010 = ImageDict(False)
image_dict_2010 = images_2010.load_tif_files(config_2010)

vPredictedImages.save_predictions(config_2010, ctx, list(image_dict_2010.keys()), 0.6, 0.1, image_dict_2010)