In [None]:
"""
melanoma_balance_weights = np.array([1.4595, 3.1758]) # set of resampling weights that yields balanced classes
isic_balance_weights = np.array([1.2405, 5.1572])
melanoma_prot3_balance_weights = np.array([23.2142, 3.6792, 1.4595])
melanoma_std = np.array([ 49.33694864,  56.56751696,  59.42470697], dtype=np.float32) # channel standard deviations
melanoma_mean = np.array([ 198.19906616, 170.38525391, 155.49664307], dtype=np.float32) # channel means
melanoma_U = np.array([[-0.51556925,  0.77283555,  0.35053246],
 				 [-0.60600832, -0.04596143, -0.78214996],
 				 [-0.60304644, -0.6169369,   0.49839384]] ,dtype=np.float32)
melanoma_EV = np.array([ 102.92342015, 25.71846177, 11.24928707], dtype=np.float32)"""

In [None]:
import numpy as np
import sys
from glob import glob
import yaml
import scipy.io
import pandas as pd
from numpy import rollaxis, swapaxes
import os
import cv2

In [None]:
from keras.models import Model
from keras.layers import Input,Lambda,Dense
from keras.layers import Conv2D,Dropout,Flatten
from keras.layers import MaxPooling2D
from keras.models import model_from_yaml
from keras import Sequential
from tensorflow.keras.optimizers import SGD,RMSprop,Adam
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import load_img,img_to_array
from keras.callbacks import EarlyStopping, ReduceLROnPlateau,ModelCheckpoint,CSVLogger
from keras.utils import to_categorical

In [None]:
melanoma_dir="../input/melanoma/datasets/datasets/edra_cropped/ready"
retina_dir="../input/melanoma/datasets/datasets/retina"
git_repo_path="../input/melanoma/melanoma-transfer/melanoma-transfer"

In [None]:
def get_image_files(datadir, listImages=[], left_only=False):
    fs = glob('{}/*'.format(datadir))
    #print("Total images:",len(fs),len(glob(datadir+"/*")))
    if left_only:
        fs = [f for f in fs if 'left' in f]
    if listImages != []:                                                        # <-- Adapted to return images from the list
        image_name = [img.split(".")[0] for img in listImages]           #listImages contains .tiff and datadir contains .jpg, so extracting image name
        fs = [f for f in fs if f.split("/")[-1].split(".")[0] in image_name]                  # Useful for running with 5x2-fold cross-validation
        print("images listed:",len(listImages)," images loaded:",len(fs))
    return np.array(sorted(fs))

def get_labels(names, labels=None, label_file=None,
               per_patient=False):
    if labels is None:
        labels = pd.read_csv(label_file,
                             index_col=0).loc[names].values.flatten()
    if per_patient:
        left = np.array(['left' in n for n in names])
        return np.vstack([labels[left], labels[~left]]).T
    else:
        return labels

In [None]:
def load_data(protocol,fold,train_dir,train_retina):
    
    if train_retina:
        files = get_image_files(train_dir)
    else:
        folds = yaml.full_load(open(git_repo_path+'/folds/'+protocol+'.yml'))
        print("The images are loaded from:",git_repo_path+'/folds/'+protocol+'.yml')
        f0, f1 = fold.split('x')
        train_list = folds['Fold_' + f0][int(f1)-1]
        files = get_image_files(train_dir, train_list)
        names = [os.path.basename(x).split('.')[0] for x in files]
        labels = get_labels(names, label_file=git_repo_path+'/folds/'+protocol+'.csv').astype(np.int32)
        if protocol!="protocol3":
            labels=to_categorical(labels,2)
        else:
            labels=to_categorical(labels,3)
        images=[]
        for file in files:
            img=load_img(file,target_size=(224,224))
            images.append(img_to_array(img))
        images=np.array(images)
        print("Input shape:   Images:",images.shape," labels:",labels.shape)
            
        return images,labels


In [None]:
def checkpoints(directory_path):
    #Stores the best model depending on loss after every epoch
    checkpoint_path = directory_path
    filepath="weights-epoch-{epoch:02d}-loss-{loss:.2f}.hdf5"
    checkpoint = ModelCheckpoint(filepath=checkpoint_path+"/"+filepath,monitor="loss",mode="min",save_best_only=True, save_weights_only=True, verbose=1)



    #Stops the training if the val_loss doesn't minimize for 20 consecutive epochs
    earlystop = EarlyStopping(monitor="loss",patience = 20,mode="min") 


    #Reduce the learning rate by 0.5 if the val_loss doesn't decrease with 2 consecutive epochs
    learning_rate_reduction = ReduceLROnPlateau(monitor="loss",patience = 2,factor = 0.5,mode="min",min_lr = 0.00001)

    return [checkpoint,earlystop,learning_rate_reduction]


In [None]:
def vgg_m(classes):
    model = Sequential()
    model.add(Conv2D(filters=96,kernel_size=(7,7),strides=(2,2), padding='valid', activation='relu',input_shape=(224,224,3)))
    model.add(Lambda(tf.nn.local_response_normalization))
    model.add(MaxPooling2D((3,3), strides=(2,2)))
    model.add(Conv2D(filters=256,kernel_size=(5,5),strides=(2,2), padding='same', activation='relu'))
    model.add(Lambda(tf.nn.local_response_normalization))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    model.add(Conv2D(filters=512,kernel_size=(3,3),strides=(1,1), padding='same', activation='relu'))
    model.add(Conv2D(filters=512,kernel_size=(3,3),strides=(1,1), padding='same', activation='relu'))
    model.add(Conv2D(filters=512,kernel_size=(3,3),strides=(1,1), padding='same', activation='relu'))
    model.add(MaxPooling2D((3,3), strides=(2,2)))
    model.add(Flatten())
    model.add(Dense(4096, activation="relu"))
    model.add(Dropout(0.5))
    model.add(Dense(4096, activation="relu"))
    model.add(Dropout(0.5))
    model.add(Dense(classes, activation="softmax"))

    
    return model

In [None]:
def rolling(roll_me): #input- (7,7,3,96) output-(96,3,7,7)
    a = swapaxes(roll_me, 3, 0)
    a = swapaxes(a, 1, 2)
    a = swapaxes(a, 2, 3)
    return a

In [None]:
def model_load_weight(model):
    #vggm
    mat = scipy.io.loadmat('../input/melanoma/datasets/datasets/imagenet/imagenet-vgg-m.mat')
    #vggm
    conv2d_58 = [np.array(rolling(mat['layers'][0][0][0][0][2][0][0]),dtype='float32').transpose(), np.array(np.squeeze(mat['layers'][0][0][0][0][2][0][1]),dtype='float32')]
    conv2d_59 = [np.array(rolling(mat['layers'][0][4][0][0][2][0][0]),dtype='float32').transpose(), np.array(np.squeeze(mat['layers'][0][4][0][0][2][0][1]),dtype='float32')]
    conv2d_60 = [np.array(rolling(mat['layers'][0][8][0][0][2][0][0]),dtype='float32').transpose(), np.array(np.squeeze(mat['layers'][0][8][0][0][2][0][1]),dtype='float32')]
    conv2d_61 = [np.array(rolling(mat['layers'][0][10][0][0][2][0][0]),dtype='float32').transpose(), np.array(np.squeeze(mat['layers'][0][10][0][0][2][0][1]),dtype='float32')]
    conv2d_62 = [np.array(rolling(mat['layers'][0][12][0][0][2][0][0]),dtype='float32').transpose(), np.array(np.squeeze(mat['layers'][0][12][0][0][2][0][1]),dtype='float32')]
    dense_28 = [np.array(np.reshape(rollaxis(mat['layers'][0][15][0][0][2][0][0], 2),(18432,4096)),dtype='float32'), np.array(np.squeeze(mat['layers'][0][15][0][0][2][0][1]),dtype='float32')]
    dense_29 = [np.array(np.squeeze(mat['layers'][0][17][0][0][2][0][0]),dtype='float32'), np.array(np.squeeze(mat['layers'][0][17][0][0][2][0][1]),dtype='float32')]
    dense_30 = [np.array(np.squeeze(mat['layers'][0][19][0][0][2][0][0]),dtype='float32'), np.array(np.squeeze(mat['layers'][0][19][0][0][2][0][1]),dtype='float32')]
    weights= [conv2d_58,[],[],conv2d_59,[],[],conv2d_60,conv2d_61,conv2d_62,[],[],dense_28,[],dense_29,[],[]] #last layer shape in pre-trained model is (4096,1000), needed (4096,2)
    i=0
    for layer in model.layers:
        if(i == len(weights)-1):
            break       #not loading the weight of the last layer
        layer.set_weights(weights[i])
        i=i+1

    return model

In [None]:
def main(train_dir,model_name,protocol,train_retina=False):
    #protocol1,protocol2,protocol3
    num_output=2
    if protocol == "protocol3":
        num_output=3
    model_save_path="./"+model_name+"_"+protocol+"_weights"
    os.mkdir(model_save_path)
    for i in range(1,6):
        for j in range(1,3):
            fold=str(i)+"x"+str(j)
            dest_path=model_save_path+"/"+fold
            os.mkdir(dest_path)
            print("-------------------training:",fold,"--------------------------------")
            
            #Load the data
            print("Loading Data")
            images,label=load_data(protocol,fold,train_dir,train_retina)
            datagen = ImageDataGenerator(rotation_range=20,width_shift_range=0.2,height_shift_range=0.2,horizontal_flip=True,validation_split=0.2)

            
            print("VGGM model")
            vggm=vgg_m(num_output)
            
            #Training only ending dense layers
            for layer in vggm.layers[:10]:
                layer.trainable=False
                    
            print("Loading pre-trained weights")
            vggm=model_load_weight(vggm)

            my_callbacks = checkpoints(dest_path)
            
            vggm.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.005), metrics=['accuracy',tf.keras.metrics.AUC()])
            
            vggm.fit(datagen.flow(images, label, batch_size=12,subset='training'),
                     validation_data=datagen.flow(images,label, batch_size=8, subset='validation'),
                     epochs=100,
                     verbose=1,
                     callbacks=my_callbacks,)
            
            files= sorted(glob(dest_path+"/*"))
            file_name=files[-1]
            for file in files:
                if file != file_name:
                    os.remove(file)
            

In [None]:
model_name="E"

In [None]:
main(melanoma_dir,model_name,"protocol3")

In [None]:
#import shutil
#shutil.make_archive('sequence', 'zip', './')

In [None]:
"""for file in files:
    data=file.split("-")
    epoch=data[2]
    file_name=data[-1]
    loss= float(file_name.split(".")[0]+"."+file_name.split(".")[1])
    if loss>min_loss:
        os.remove(file)
    else:
        min_loss=loss 
        file_path=file
    print(epoch," ",file_name," ",float(loss)," ",min_loss)"""

In [None]:
#import os
#os.chdir('./')

In [None]:
#from IPython.display import FileLink
#FileLink('sequence.zip')
