# Import libraries

In [None]:
import warnings
warnings.filterwarnings('ignore')

import os
import cv2
import csv
import pickle
import pydicom
import numpy as np
import pandas as pd 
from glob import glob


# import the necessary packages
import keras
import tensorflow as tf
from keras import backend as K

from dataset import prepare_data
from metric_loss import my_iou_metric, iou_metric_batch_val, bce_dice_loss
from predict import predict_result_val, prepare_test, get_test, get_prediction, get_rles
from generator import DataGenerator, label_generator

import seg_models
keras.backend.set_image_data_format('channels_last')

from keras.optimizers import SGD
from keras.callbacks import ModelCheckpoint

import sys
sys.path.insert(0, 'siim-acr-pneumothorax-segmentation')
from mask_functions import rle2mask, mask2rle

## Seeding 
seed = 1994
np.random.seed = seed
os.environ['PYTHONHASHSEED'] = str(seed)
tf.seed = seed

import gc   #Gabage collector for cleaning deleted data from memory

# Dataset

In [None]:
# defining configuration parameters
org_size = 1024 # original image size
img_size = 512  #512 # image resize size
batch_size = 4 # batch size for training unet

## Load train and validation data from files

In [None]:
pkl_file_train = open('process_data/X_train.pkl', 'rb')

X_train = pickle.load(pkl_file_train)

In [None]:
pkl_file_val = open('process_data/X_val.pkl', 'rb')

X_val = pickle.load(pkl_file_val)

In [None]:
pkl_file_masks = open('process_data/masks.pkl', 'rb')

masks = pickle.load(pkl_file_masks)

## Data generation & Augmentations

In [None]:
import albumentations as A

In [None]:
training_augmentation = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.OneOf([
        A.CLAHE(),
        A.RandomContrast(),
        A.RandomGamma(),
        A.RandomBrightness(),
         ], p=0.3),
    A.OneOf([
        A.ElasticTransform(alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03),
        A.GridDistortion(),
        A.OpticalDistortion(distort_limit=2, shift_limit=0.5),
        ], p=0.3),
    A.ShiftScaleRotate(shift_limit=0.2, scale_limit=0.2, rotate_limit=20,
                                        border_mode=cv2.BORDER_CONSTANT, p=0.5),
    #A.RandomSizedCrop(min_max_height=(412, 512), height=img_size, width=img_size,p=0.25)
],p=1)

In [None]:
params_train = {'img_size': img_size,
          'batch_size': batch_size,
          'n_channels': 3,
          'shuffle': True,
           'augmentations':training_augmentation,
           }

params_val = {'img_size': img_size,
          'batch_size': batch_size,
          'n_channels': 3,
          'shuffle': True,
         }

# Generators
training_generator = DataGenerator(X_train, masks, **params_train)
validation_generator = DataGenerator(X_val, masks, **params_val)

In [None]:
x, y = training_generator.__getitem__(0)
print(x.shape, y.shape)

# Segmentation model

In [None]:
K.clear_session()

In [None]:
BACKBONE = 'efficientnetb4'
unet_eff = seg_models.Unet(backbone_name=BACKBONE, encoder_weights='imagenet')#, decoder_use_batchnorm=False
unet_eff.summary()

In [None]:
unet_eff.compile(optimizer=Adam(), loss=seg_models.losses.bce_dice_loss, metrics=[my_iou_metric])

In [None]:
epochs = 80
swa = SWA('model_output/512_efficientnetb4_swa.model',76)

callbacks = [
    ModelCheckpoint("model_output/512_efficientnetb4.model",monitor='val_loss', 
                            mode = 'min', save_best_only=True,
                            verbose=1),
    swa,
    CosineAnnealingScheduler(T_max=epochs, eta_max=1e-4, eta_min=1e-6, verbose=1)
]

In [None]:
history = unet_eff.fit_generator(generator=training_generator,
                            validation_data=validation_generator,   
                            epochs=epochs, verbose=1,
                            callbacks=callbacks)

In [None]:
# list all data in history
print(history.history.keys())

# summarize history for iou
plt.figure(figsize=(20,5))
plt.subplot(1,2,1)
plt.plot(history.history['my_iou_metric'])
plt.plot(history.history['val_my_iou_metric'])
plt.title('model IOU')
plt.ylabel('iou')
plt.xlabel('epoch')
plt.legend(['train', 'Validation'], loc='upper left')

# summarize history for loss
plt.subplot(1,2,2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'Validation'], loc='upper left')

In [None]:
# Load best model or swa model
print('using swa weight model')
unet_eff.load_weights('model_output/512_efficientnetb4_swa.model')

In [None]:
scores = unet_eff.evaluate_generator(validation_generator)

print("Loss: {:.5}".format(scores[0]))
print("mean  {:.5}".format(scores[1]))

# Evaluation validation data

In [None]:
params_val = {'img_size': img_size,
          'batch_size': 5,
          'n_channels': 3,
          'shuffle': False,
         }

# Generators
validation_generator = DataGenerator(X_val, masks, **params_val)

In [None]:
AUGMENTATIONS_TEST_FLIPPED = A.Compose([
    A.HorizontalFlip(p=1),
],p=1)

params_val_flip = {'img_size': img_size,
          'batch_size': 5,
          'n_channels': 3,
          'shuffle': False,
        'augmentations':AUGMENTATIONS_TEST_FLIPPED,
         }

validation_generator_flipped = DataGenerator(X_val, masks, **params_val_flip)

In [None]:
preds_valid_orig = predict_result(unet_eff,validation_generator,img_size)
preds_valid_flipped = predict_result(unet_eff,validation_generator_flipped,img_size)
preds_valid_flipped = np.array([np.fliplr(x) for x in preds_valid_flipped])
preds_valid = 0.5*preds_valid_orig + 0.5*preds_valid_flipped

In [None]:
np.savez_compressed('process_data/val_pre/efficientnetb4_preds_valid', array1= preds_valid)
np.savez_compressed('process_data/val_pre/efficientnetb4_preds_valid_orig', array1= preds_valid_orig)

In [None]:
y_truth_val = label_generator(X_val, masks, len(preds_valid), img_size, 3)

np.savez_compressed('process_data/val_pre/y_truth_val', array1= y_truth_val)

In [None]:
decompressed_array= np.load("process_data/val_pre/y_truth_val.npz")  
y_truth_val = decompressed_array['array1']

In [None]:
## Scoring for last model
score = 0.0
mask_area = 0
best_th = 0

thresholds = np.arange(0.2, 0.9, 0.01) 
areas = [1024, 2048, 3072, 4096]
for threshold in tqdm(thresholds):
    for area in tqdm(areas):
        iou = iou_metric_batch_val(y_truth_val, np.int32(preds_valid > threshold), area)
        if iou > score:
            score = iou
            mask_area = area
            best_th = threshold
            print("Threshold {}\tMask area {}\tIoU {}".format(best_th, mask_area, score))
    print()

# Test Prediction

In [None]:
test_file = 'stage2_siim_data/stage_2_images/*.dcm'
test_metadata_df = prepare_test(test_file, rle_file)

In [None]:
test_data = get_test(3205, test_metadata_df, img_size=img_size, channels=3) #0, 1068, 2136, 3205
print(test_data.shape)

In [None]:
efficientb4_512_preds_test = get_prediction(unet_eff, test_data, batch_size=batch_size)

In [None]:
decompressed_array= np.load("process_data/test_pre/efficientb4_512_preds_test.npz")  
efficientb4_512_preds_test = decompressed_array['array1']

In [None]:
rles = get_rles(preds_test, b_th = 0.55, r_th = 2048)

In [None]:
test_fn = sorted(glob('stage2_siim_data/stage_2_images/*.dcm'))
test_IDs = [o.split('/')[-1][:-4] for o in test_fn]

In [None]:
sub_df = pd.DataFrame({'ImageId': test_IDs, 'EncodedPixels': rles})
sub_df.loc[sub_df.EncodedPixels=='', 'EncodedPixels'] = '-1'
sub_df.head()

In [None]:
sub_df.to_csv('model_submission/efficientb4_submission.csv', index=False)

In [None]:
sub_df['EncodedPixels'].value_counts(normalize=True) * 100