# Train model

In [None]:
import numpy as np
from PIL import ImageFile
import pandas as pd
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras.backend import *
from tensorflow.keras.preprocessing.image import *
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import tensorflow as tf
import tensorflow as tf 
ImageFile.LOAD_TRUNCATED_IMAGES = True
plt.style.use('fivethirtyeight')
%matplotlib inline
import cv2
from tensorflow.keras.optimizers import Adam
import albumentations as A

In [None]:
df = pd.read_csv("../input/aiijc-final-dcm/train.csv")
df.head()

In [None]:
df['label'] = df['label'].replace({"Направо с последующим поворотом налево": "to the right followed by a left turn", 
                                    "Въезд на кольцо": "entrance to the ring",
                                    "Слегка налево": "slightly to the left",
                                    "Слегка направо": "slightly to the right"}, regex=True)
df['label'] = df['label'].replace({
  "Прямо": "straight",
  "Направо": "right",
  "Налево": "left",
}, regex=True)

In [None]:
def convert_label(input_str):
    tmp_str = ""
    tmp = 0
    k=0
    label_ = ["straight","left","right","entrance to the ring", "slightly to the left", 
              "slightly to the right", "to the right followed by a left turn"]
    output = [[0, 0, 0, 0, 0, 0, 0],
              [0, 0, 0, 0, 0, 0, 0],
              [0, 0, 0, 0, 0, 0, 0],
              [0, 0, 0, 0, 0, 0, 0],
              [0, 0, 0, 0, 0, 0, 0]]
    
    for i in input_str:
        k+=1
        if k==len(input_str):
            tmp_str = tmp_str + i
            ind = label_.index(tmp_str)
            output[tmp][ind] = 1
            tmp_str = ""
            tmp+=1
        if i != "," and i != "+":
            tmp_str = tmp_str + i
        elif i == ",":
            ind = label_.index(tmp_str)
            output[tmp][ind] = 1
            tmp_str = ""
            tmp+=1
        elif i == "+":
            ind = label_.index(tmp_str)
            output[tmp][ind] = 1
            tmp_str = ""
    return output

labels = []

for i in range(len(df)):
    output = convert_label(df["label"][i])
    flatten_output = [j for sub in output for j in sub]
    labels.append(flatten_output)

In [None]:
root_dir = "../input/new-autoencoder-128-128/"
X = []

for i in tqdm(range(df.shape[0])):
    img_path = root_dir + df["filename"][i]
    X.append(cv2.resize(cv2.imread(img_path),dsize=(128,128)))
    
X = np.array(X).astype("float32")
Y = np.array(labels).astype("float32")

In [None]:
train_transform = A.Compose([
        A.augmentations.transforms.OpticalDistortion(p=0.3),
        A.augmentations.transforms.ColorJitter(always_apply=False, p=0.3),
        A.augmentations.geometric.rotate.Rotate(p=0.3, limit=7),
        A.augmentations.transforms.RandomShadow(p=0.3),
        A.augmentations.transforms.Cutout(num_holes=6, max_h_size=5, max_w_size=5, fill_value=0, always_apply=False, p=0.4),
])

In [None]:
def albu_transform(image):
    return train_transform(image=image)['image']/255.

In [None]:
datagen_train = tf.keras.preprocessing.image.ImageDataGenerator(preprocessing_function = albu_transform)
datagen_val =tf.keras.preprocessing.image.ImageDataGenerator(rescale=1/255.)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, Y, test_size=0.2, shuffle = True, random_state = 42)

In [None]:
def train_model(model, lr, min_lr, num_epoch, step_per_epoch, validation_step):
    print('--------------Deploying the Model...--------------')
    model.compile(loss = 'binary_crossentropy', 
                  optimizer = Adam(lr = lr),
                  metrics = tf.keras.metrics.AUC())
    monitor = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', 
                                               min_delta = 0.0001, 
                                               patience = 10, 
                                               verbose = 1, 
                                               mode = 'min',
                                               restore_best_weights = True)
    
    lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(monitor = "val_loss",
                                                        factor = 0.5,
                                                        patience = 3,
                                                        verbose = 1,
                                                        mode = 'min',
                                                        min_delta = 0.0001,
                                                        cooldown = 0,
                                                        min_lr = min_lr)
    filepath = ModelName + "_roc_.h5"
    checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath, 
                                                    monitor = 'val_loss', 
                                                    verbose = 1, 
                                                    save_best_only = False, 
                                                    save_weights_only = False, 
                                                    mode = 'min',
                                                    save_freq = 'epoch') 
    print('--------------Deployed Successfully--------------')
    print('--------------Training Begins--------------')
    
    history = model.fit(datagen_train.flow(X_train, y_train), 
                        epochs = num_epoch, 
                        steps_per_epoch = step_per_epoch,
                        validation_data = datagen_val.flow(X_val, y_val),
                        validation_steps = validation_step,
                        callbacks = [monitor,lr_scheduler])
    return history

def BuildModel(ModelName):
    print('--------------Building The Model...--------------')
    base_model = tf.keras.applications.ResNet50(include_top=False,
                                                weights='imagenet',
                                                input_shape=(128,128,3))
    base_model.trainable = True
    print("\nNumber of layers in the base model: ", len(base_model.layers))
    x = base_model.output
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dense(1024,activation='relu')(x)
    x = tf.keras.layers.Dense(512,activation='relu')(x)
    out = tf.keras.layers.Dense(35, activation='sigmoid')(x)
    model = tf.keras.models.Model(inputs=base_model.input, outputs=out)
        
    print('\n--------------Done!--------------')
    return model

In [None]:
clear_session()
BATCH_SIZE = 128
model_name = "resnet50"

model = BuildModel(ModelName=model_name)

step_per_epoch = len(y_train)//batch_size
validation_step = len(y_train)//batch_size

lr = 0.0002
num_epoch = 1000
min_lr = 1e-08

history = train_model(model, lr, min_lr, num_epoch, step_per_epoch, validation_step)

In [None]:
model.save('resnet50.h5')

# Predict

In [None]:
df = pd.read_csv('../input/aiijc-final-dcm/sample_submission.csv')

In [None]:
root_dir = "../input/aiijc-final-dcm/"
X_test = []

for i in tqdm(range(df.shape[0])):
    img_path = root_dir + df["filename"][i]
    X_test.append(cv2.resize(cv2.imread(img_path),dsize=(128,128)))

In [None]:
result_np = model.predict(X_test, batch_size=512, verbose=1)
result_np = result_np.reshape(-1,5,7)

In [None]:
labels_to_use = ["straight","left","right","entrance to the ring", "slightly to the left", 
                 "slightly to the right", "to the right followed by a left turn"]
mega_str=[]
for i in tqdm(result_np):
    tup = np.where(i>=0.2) #this is the threshold
    small_str=[]
    first_cl=''
    second_cl=''
    third_cl=''
    fourth_cl=''
    fifth_cl=''
    col_1 = np.where(tup[0]==0) 
    col_2 = np.where(tup[0]==1)
    col_3 = np.where(tup[0]==2)
    col_4 = np.where(tup[0]==3)
    col_5 = np.where(tup[0]==4)
    
    if len(col_1[0])>0:
        for indice in np.array(col_1)[0]:
            first_cl = first_cl+'+'+labels_to_use[tup[1][indice]]
        
        first_cl=first_cl[1:]+','
            
    if len(col_2[0])>0:
        for indice in np.array(col_2)[0]:
            second_cl = second_cl+'+'+labels_to_use[tup[1][indice]]
        
        second_cl=second_cl[1:]+','
            
    if len(col_3[0])>0:
        for indice in np.array(col_3)[0]:
            third_cl = third_cl+'+'+labels_to_use[tup[1][indice]]
            
        third_cl = third_cl[1:]+','
            
    if len(col_4[0])>0:
        for indice in np.array(col_4)[0]:
            fourth_cl = fourth_cl+'+'+labels_to_use[tup[1][indice]]
            
        fourth_cl = fourth_cl[1:]+','
            
    if len(col_5[0])>0:
        for indice in np.array(col_5)[0]:
            fifth_cl = fifth_cl+'+'+labels_to_use[tup[1][indice]]
        
        fifth_cl = fifth_cl[1:]+','
    final_str= first_cl+second_cl+third_cl+fourth_cl+fifth_cl
    mega_str.append(final_str[:-1])

In [None]:
df_pred = pd.DataFrame({'filename':df['filename'],
                       'label':mega_str})
df_pred.to_csv('submission.csv')