In [None]:
import os
import ast
import sys
import json
import random
import logging
import argparse
import numpy as np
from tqdm import tqdm
from os.path import dirname as up

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 
os.environ["CUDA_VISIBLE_DEVICES"]="1"

# import segmentation_models_pytorch as smp

import torch
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader

path_cur = os.path.abspath(os.getcwd())

sys.path.append(path_cur)
from unet import UNet

from vims_dataloader import GenDEBRIS, RandomRotationTransform , class_distr, gen_weights, bands_mean, bands_std

sys.path.append(os.path.join(up(up(path_cur)), 'utils'))
from vims_metrics import Evaluation

root_path = up(up(path_cur))

# logging.basicConfig(filename=os.path.join(root_path, 'logs_paper', 'log_unet.log'), filemode='a',level=logging.INFO, format='%(name)s - %(levelname)s - %(message)s')
# logging.info('*'*10)

In [None]:
root_path

In [None]:
"""
When generating random numbers, the seed between CPU and GPU is not synchronized. 
Hence, we need to set the seed on the GPU separately to ensure a reproducible code. 
Note that due to different GPU architectures, running the same code on different GPUs does not guarantee the same random numbers. 
Still, we don’t want that our code gives us a different output every time we run it on the exact same hardware. 

"""

def seed_all(seed):
    # Pytorch Reproducibility
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
def seed_worker(worker_id):
    # DataLoader Workers Reproducibility
    worker_seed = torch.initial_seed() % 2**32 # Returns the initial seed for generating random numbers as a Python long: torch.initial_seed()
    np.random.seed(worker_seed)
    random.seed(worker_seed) # generate the same value of randomized value for reproduction


In [None]:
def main(options):
    # Reproducibility
    # Limit the number of sources of nondeterministic behavior 
    seed_all(0)
    
    # Creates and returns a generator object that manages the state of the algorithm which produces pseudo random numbers. Used as a keyword argument in many In-place random sampling functions.
    g = torch.Generator() 
    g.manual_seed(0)
    
    # Tensorboard: 
    # The SummaryWriter class is the entry to log data for consumption and visualization by TensorBoard: https://pytorch.org/docs/stable/tensorboard.html
    
    #
    dest_tensorboard = 'unet_{}_{}'.format(options['lr'], options['data_source']) #options['epochs']
    writer = SummaryWriter(os.path.join(root_path, 'logs_paper', dest_tensorboard))
    
    checkpoint_path = os.path.join(options['checkpoint_root_path'], dest_tensorboard)
    
    # Transformations
    
    transform_train = transforms.Compose([transforms.ToTensor(),
                                    RandomRotationTransform([-90, 0, 90, 180]),
                                    transforms.RandomHorizontalFlip()])
    
    transform_test = transforms.Compose([transforms.ToTensor()])
    
    # Standardization
    standardization = None # transforms.Normalize(bands_mean, bands_std)
    
    # Construct Data loader
    
    if options['mode']=='train':
        
        dataset_train = GenDEBRIS('train', transform=transform_train, standardization = standardization, agg_to_water = options['agg_to_water'])
        dataset_test = GenDEBRIS('val', transform=transform_test, standardization = standardization, agg_to_water = options['agg_to_water'])
        
        train_loader = DataLoader(  dataset_train, 
                                    batch_size = options['batch'], 
                                    shuffle = True,
                                    num_workers = 0,
                                    pin_memory = False,
                                    worker_init_fn=seed_worker,
                                    generator=g) #If not None, this RNG (random number generator) will be used by RandomSampler to generate random indexes and multiprocessing 
                                                 #to generate base_seed for workers. (default: None)
        
        test_loader = DataLoader(   dataset_test, 
                                    batch_size = options['batch'], 
                                    shuffle = False,
                                    num_workers = 0,
                                    pin_memory = False,
                                    worker_init_fn=seed_worker,
                                    generator=g)

        
    elif options['mode']=='test':
        
        dataset_test = GenDEBRIS('test', transform=transform_test, standardization = standardization, agg_to_water = options['agg_to_water'])
    
        test_loader = DataLoader(   dataset_test, 
                                    batch_size = options['batch'], 
                                    shuffle = False,
                                    num_workers = 0,
                                    worker_init_fn=seed_worker,
                                    generator=g)
    else:
        raise
    
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    
#     device = torch.device("cpu")
        
    model = UNet(input_bands = options['input_channels'], 
                 output_classes = options['output_channels'], 
                 hidden_channels = options['hidden_channels'])
        
#     model = smp.Unet(encoder_name="resnet34",        # choose encoder, e.g. mobilenet_v2 or efficientnet-b7
#                     encoder_weights="imagenet",     # use `imagenet` pre-trained weights for encoder initialization
#                     in_channels=options['input_channels'],                  # model input channels (1 for gray-scale images, 3 for RGB, etc.)
#                     classes=options['output_channels'])                      # model output channels (number of classes in your dataset)) 

    model.to(device)

    # Load model from specific epoch to continue the training or start the evaluation
    if options['resume_from_epoch'] > 1:
        
        resume_model_dir = os.path.join(checkpoint_path, str(options['resume_from_epoch']))
        model_file = os.path.join(resume_model_dir, 'model.pth')
        logging.info('Loading model files from folder: %s' % model_file)

        checkpoint = torch.load(model_file, map_location = device)
        model.load_state_dict(checkpoint)

        del checkpoint  # dereference
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

    global class_distr
    # Aggregate Distribution Mixed Water, Wakes, Cloud Shadows, Waves with Marine Water

#     if options['agg_to_water']:
#         agg_distr = sum(class_distr[-4:]) # Density of Mixed Water, Wakes, Cloud Shadows, Waves
#         class_distr[6] += agg_distr       # To Water
#         class_distr = class_distr[:-4]    # Drop Mixed Water, Wakes, Cloud Shadows, Waves

    # Weighted Cross Entropy Loss & adam optimizer
    weight = gen_weights(class_distr, c = options['weight_param'])
    
    # nn.CrossEntropyLoss: This criterion computes the cross entropy loss between input and target
    # CrossEntropyLoss()损失函数交叉熵主要是用来判定实际的输出与期望的输出的接近程度
    # https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html
    criterion = torch.nn.CrossEntropyLoss(ignore_index=-1, reduction= 'mean', weight=weight.to(device))

    optimizer = torch.optim.Adam(model.parameters(), lr=options['lr'], weight_decay=options['decay']) # weight_decay: (L2 penalty)

    # Learning Rate scheduler
    if options['reduce_lr_on_plateau']==1:
        
        """
        Reduce learning rate when a metric has stopped improving. 
        Models often benefit from reducing the learning rate by a factor of 2-10 once learning stagnates. 
        This scheduler reads a metrics quantity and if no improvement is seen for a ‘patience’ number of epochs, the learning rate is reduced.
        """
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, verbose=True)
    else:
        """
        Decays the learning rate of each parameter group by gamma once the number of epoch reaches one of the milestones. 
        Notice that such decay can happen simultaneously with other changes to the learning rate from outside this scheduler. 
        When last_epoch=-1 (default), sets initial lr as lr.
        """
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, options['lr_steps'], gamma=0.1, verbose=True)

    # Start training
    start = options['resume_from_epoch'] + 1
    epochs = options['epochs']
    eval_every = options['eval_every'] #1

    
    # Write model-graph to Tensorboard
    if options['mode']=='train':
        dataiter = iter(train_loader)
        image_temp, _ = dataiter.next().values()
        
        writer.add_graph(model, image_temp.to(device)) # add graph data to summary
        
        ###############################################################
        # Start Training                                              #
        ###############################################################
        model.train()
        
        for epoch in range(start, epochs+1):
            training_loss = []
            training_batches = 0
            
            i_board = 0
#             for (image, target) in tqdm(train_loader.values(), desc="training"):

            for (batch_idx, batch_val) in enumerate(tqdm(train_loader, desc="training")):
                
                image = batch_val['image'].to(device)
                target = batch_val['mask'].to(device)
    
                optimizer.zero_grad() # zero the parameter gradients
                
                logits = model(image) # model predict tag
                
                loss = criterion(logits, target) # calculate the loss between the input (predict tag) and target tag
    
                loss.backward()
    
                training_batches += target.shape[0]
    
                training_loss.append((loss.data*target.shape[0]).tolist())
                
                optimizer.step()
                
                # Write running loss
                writer.add_scalar('training loss', loss , (epoch - 1) * len(train_loader)+i_board)
                i_board+=1
            
            logging.info("Training loss was: " + str(sum(training_loss) / training_batches))
            
            ###############################################################
            # Start Evaluation                                            #
            ###############################################################
            
            if epoch % eval_every == 0 or epoch==1:
                model.eval()
    
                test_loss = []
                test_batches = 0
                y_true = []
                y_predicted = []
                
                with torch.no_grad():
                        
                    for (batch_idx, batch_val) in enumerate(tqdm(test_loader, desc="testing")):
            
                        image = batch_val['image'].to(device)
                        target = batch_val['mask'].to(device)
    
                        logits = model(image)
                        
                        loss = criterion(logits, target)
                                    
                        # Accuracy metrics only on annotated pixels
                        logits = torch.movedim(logits, (0,1,2,3), (0,3,1,2))
                        logits = logits.reshape((-1,options['output_channels']))
                        target = target.reshape(-1)
                        mask = target != -1
                        logits = logits[mask]
                        target = target[mask]
                        
                        probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy()
                        target = target.cpu().numpy()
                        
                        test_batches += target.shape[0]
                        test_loss.append((loss.data*target.shape[0]).tolist())
                        y_predicted += probs.argmax(1).tolist()
                        y_true += target.tolist()
                            
                        
                    y_predicted = np.asarray(y_predicted)
                    y_true = np.asarray(y_true)
                    
                    ####################################################################
                    # Save Scores to the .log file and visualize also with tensorboard #
                    ####################################################################
                    
                    acc = Evaluation(y_predicted, y_true)
                    logging.info("\n")
                    logging.info("Test loss was: " + str(sum(test_loss) / test_batches))
                    logging.info("STATISTICS AFTER EPOCH " +str(epoch) + ": \n")
                    logging.info("Evaluation: " + str(acc))
    
    
                    logging.info("Saving models")
                    model_dir = os.path.join(checkpoint_path, str(epoch))
                    os.makedirs(model_dir, exist_ok=True)
                    torch.save(model.state_dict(), os.path.join(model_dir, 'model.pth'))
                    
                    writer.add_scalars('Loss per epoch', {'Test loss':sum(test_loss) / test_batches, 
                                                          'Train loss':sum(training_loss) / training_batches}, 
                                       epoch)
                    
                    writer.add_scalar('Precision/test macroPrec', acc["macroPrec"] , epoch)
                    writer.add_scalar('Precision/test microPrec', acc["microPrec"] , epoch)
                    writer.add_scalar('Precision/test weightPrec', acc["weightPrec"] , epoch)
                    
                    writer.add_scalar('Recall/test macroRec', acc["macroRec"] , epoch)
                    writer.add_scalar('Recall/test microRec', acc["microRec"] , epoch)
                    writer.add_scalar('Recall/test weightRec', acc["weightRec"] , epoch)
                    
                    writer.add_scalar('F1/test macroF1', acc["macroF1"] , epoch)
                    writer.add_scalar('F1/test microF1', acc["microF1"] , epoch)
                    writer.add_scalar('F1/test weightF1', acc["weightF1"] , epoch)
                    
                    writer.add_scalar('IoU/test MacroIoU', acc["IoU"] , epoch)
                    
    
                if options['reduce_lr_on_plateau'] == 1:
                    scheduler.step(sum(test_loss) / test_batches)
                else:
                    scheduler.step()
                    
                model.train()
               
    # CODE ONLY FOR EVALUATION - TESTING MODE !
    elif options['mode']=='test':
        
        model.eval()

        test_loss = []
        test_batches = 0
        y_true = []
        y_predicted = []
        
        with torch.no_grad():
            
            for (batch_idx, batch_val) in enumerate(tqdm(test_loader, desc="testing")):
            
                image = batch_val['image'].to(device)
                target = batch_val['mask'].to(device)

                image = image.to(device)
                target = target.to(device)

                logits = model(image)
                
                loss = criterion(logits, target)

                # Accuracy metrics only on annotated pixels
                logits = torch.movedim(logits, (0,1,2,3), (0,3,1,2))
                logits = logits.reshape((-1,options['output_channels']))
                target = target.reshape(-1)
                mask = target != -1
                logits = logits[mask]
                target = target[mask]
                
                probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy()
                target = target.cpu().numpy()
                
                test_batches += target.shape[0]
                test_loss.append((loss.data*target.shape[0]).tolist())
                y_predicted += probs.argmax(1).tolist()
                y_true += target.tolist()
                
            y_predicted = np.asarray(y_predicted)
            y_true = np.asarray(y_true)
            
            ####################################################################
            # Save Scores to the .log file                                     #
            ####################################################################
            acc = Evaluation(y_predicted, y_true)
            logging.info("\n")
            logging.info("Test loss was: " + str(sum(test_loss) / test_batches))
            logging.info("STATISTICS: \n")
            logging.info("Evaluation: " + str(acc))



In [None]:
data_source = 'Image_allyear_VA_256'
lr = 2e-3
epo = 100

logname = 'unet_{}_{}.log'.format(lr, data_source)

logging.basicConfig(filename=os.path.join(root_path, 'logs_paper', logname), filemode='a',level=logging.INFO, format='%(name)s - %(levelname)s - %(message)s')
logging.info('*'*10)


options = {'agg_to_water': False, 'mode': 'train', 'epochs':epo, 'batch': 32, 'resume_from_epoch': 110,
          'input_channels': 3, 'output_channels': 4, 'hidden_channels': 16, 'weight_param': 1.03,
          'lr': lr, 'decay': 0, 'reduce_lr_on_plateau': 1, 'lr_steps':'[40]', 
          'checkpoint_root_path': path_cur, 'eval_every': 1,'data_source': data_source}
# decay: L2 penalty in optimizer
# data_source: used to format the output folder name
# eval_every: evaluation rate == 1, evaluate every epoch


In [None]:
main(options)

In [None]:
# nvidia-smi