In [None]:
import pandas as pd 
import numpy as np
import tensorflow as tf
# import tensorflow-io as tfio
import warnings
warnings.filterwarnings('ignore') 

import gctf
import os
import random 
import PIL
from PIL import Image

from sklearn.model_selection import StratifiedKFold

from tensorflow.keras.applications import EfficientNetB0,EfficientNetB3,EfficientNetB5
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers import Dense, Dropout, Flatten,GlobalAveragePooling2D,BatchNormalization, Activation

from tensorflow.keras.optimizers import Adam 
from tensorflow.keras.losses import BinaryCrossentropy 
from tensorflow.keras.metrics import AUC

In [None]:
class config:
    seed=42
    batch_size=4
    IMG_SIZE=512
    IMG_SHAPE=(IMG_SIZE,IMG_SIZE,3)
    dropout_rate=0.2
    num_classes=1
    AUTOTUNE=tf.data.experimental.AUTOTUNE
    N_SPLITS=5
    learning_rate=1e-5
    epochs=30
    

In [None]:
def seed_all(seed):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    random.seed(seed)
    os.environ['PYTHONASHSEED']=str(seed)
    os.environ['TF_DETERMINISTIC_OPS']='1'

seed_all(config.seed)

In [None]:
train_data=pd.read_csv('data/Train.csv')
test_data=pd.read_csv('data/Test.csv')
sub=pd.read_csv('data/SampleSubmission.csv')

In [None]:
training_data='/media/revanth/01D7A0158DB621C0/competitions/zindi/weekend_hackathon_road_segmentation/'
train_data['file_path']=training_data+train_data['Image_ID']+'.jpeg'
test_data['file_path']=training_data+test_data['Image_ID']+'.jpeg'

In [None]:
def process_train_data(image_path,label):
    image=tf.io.read_file(image_path)
    image=tf.io.decode_jpeg(image,channels=3)
    p_spatial = tf.random.uniform([], 0, 1.0, dtype = tf.float32)
    p_rotate = tf.random.uniform([], 0, 1.0, dtype = tf.float32)
    p_pixel_1 = tf.random.uniform([], 0, 1.0, dtype = tf.float32)
    p_pixel_2 = tf.random.uniform([], 0, 1.0, dtype = tf.float32)
    p_pixel_3 = tf.random.uniform([], 0, 1.0, dtype = tf.float32)
    p_crop = tf.random.uniform([], 0, 1.0, dtype = tf.float32)
            
    # Flips
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    if p_spatial > 0.75:
        image = tf.image.transpose(image)
        
    # Rotates
    if p_rotate > 0.75:
        image = tf.image.rot90(image, k = 3) # rotate 270º
    elif p_rotate > 0.5:
        image = tf.image.rot90(image, k = 2) # rotate 180º
    elif p_rotate > 0.25:
        image = tf.image.rot90(image, k = 1) # rotate 90º
        
    # Pixel-level transforms
    if p_pixel_1 >= 0.4:
        image = tf.image.random_saturation(image, lower = 0.7, upper = 1.3)
    if p_pixel_2 >= 0.4:
        image = tf.image.random_contrast(image, lower = 0.8, upper = 1.2)
    if p_pixel_3 >= 0.4:
        image = tf.image.random_brightness(image, max_delta = 0.1)
        
    # Crops
    if p_crop > 0.7:
        if p_crop > 0.9:
            image = tf.image.central_crop(image, central_fraction = 0.7)
        elif p_crop > 0.8:
            image = tf.image.central_crop(image, central_fraction = 0.8)
        else:
            image = tf.image.central_crop(image, central_fraction = 0.9)
    # elif p_crop > 0.4:
    #     crop_size = tf.random.uniform([], int(config.IMG_SIZE * 0.8), config.IMG_SIZE, dtype = tf.int32)
    #     image = tf.image.random_crop(image, size = [crop_size, crop_size, 3])

    image=tf.image.resize(image,size=[config.IMG_SIZE,config.IMG_SIZE])
    image=tf.cast(image,dtype='float32')/255.0
    return image, label

def process_valid_data(image_path,label):
    image=tf.io.read_file(image_path)
    image=tf.io.decode_jpeg(image,channels=3)
    image=tf.image.resize(image,size=[config.IMG_SIZE,config.IMG_SIZE])
    image=tf.cast(image,dtype='float32')/255.0
    return image, label


In [None]:
def getDatasetFromDataframe(train_files, train_labels, val_files, val_labels):

    train_ds = tf.data.Dataset.from_tensor_slices((train_files, train_labels))
    train_ds = train_ds.shuffle(len(train_files))
    train_ds = train_ds.map(process_train_data , num_parallel_calls=16)
    train_ds = train_ds.batch(config.batch_size)
    train_ds = train_ds.prefetch(config.AUTOTUNE)

    val_ds = tf.data.Dataset.from_tensor_slices((val_files, val_labels))
    val_ds = val_ds.map(process_valid_data , num_parallel_calls=16)
    val_ds = val_ds.batch(config.batch_size)
    val_ds = val_ds.prefetch(config.AUTOTUNE)
    
    return train_ds , val_ds

In [None]:
skf=StratifiedKFold(n_splits=config.N_SPLITS,shuffle=True,random_state=config.seed)
x=train_data['file_path']
y=train_data['Target']

In [None]:
def get_model():
    eff=EfficientNetB3(include_top=False,weights='imagenet',input_shape=config.IMG_SHAPE)
    eff.trainable=True
    out1=GlobalAveragePooling2D()(eff.output)
    out2=Dropout(config.dropout_rate)(out1)
    out3=Dense(1,activation='sigmoid')(out2)

    model=Model(eff.input,out3)
    opt=Adam(learning_rate=config.learning_rate)
    opt.get_gradients = gctf.centralized_gradients_for_optimizer(opt)
    model.compile(
        optimizer=opt,
        loss=BinaryCrossentropy(),
        metrics=AUC())

    return model 


In [None]:
for fold ,(train_idx,test_idx) in enumerate(skf.split(x,y)):
    print('#'*50)
    print(f'FOLD NUMBER : {fold}')
    print('#'*50)

    x_train,x_test=x.loc[train_idx],x.loc[test_idx]
    y_train,y_test=y.loc[train_idx],y.loc[test_idx]
    
    train_ds,val_ds=getDatasetFromDataframe(x_train.values,y_train.values,x_test.values,y_test.values)
  
    tf.keras.backend.clear_session()
    model=get_model()
    weight_path_save = f"data/models/bestb0_384_model_{str(fold)}_.hdf5"
# last_weight_path = 'last_model.hdf5'

    checkpoint = ModelCheckpoint(weight_path_save, 
                                monitor= 'val_auc', 
                                verbose=1, 
                                save_best_only=True, 
                                mode= 'max', 
                                save_weights_only = False)


    early = EarlyStopping(monitor= 'val_auc', 
                        mode= 'max', 
                        patience=5)

    reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', patience=2, verbose=1, mode='auto')
    callbacks_list = [checkpoint, early, reduceLROnPlat]
    model.fit(train_ds,validation_data=val_ds,callbacks=callbacks_list,epochs=config.epochs,verbose=1)

