In [1]:
import numpy as np
import sys
from glob import glob
import yaml
import scipy.io
import pandas as pd
from numpy import rollaxis, swapaxes
import os
import cv2

In [2]:
from keras.models import Model
from keras.layers import Input,Lambda,Dense
from keras.layers import Conv2D,Dropout,Flatten
from keras.layers import MaxPooling2D
from keras.models import model_from_yaml
from keras import Sequential
from tensorflow.keras.optimizers import SGD,RMSprop,Adam
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import load_img,img_to_array
from keras.callbacks import EarlyStopping, ReduceLROnPlateau,ModelCheckpoint,CSVLogger
from keras.utils import to_categorical

In [3]:
melanoma_dir="../input/melanoma/datasets/datasets/edra_cropped/ready"
retina_dir="../input/melanoma/datasets/datasets/retina"
git_repo_path="../input/melanoma/melanoma-transfer/melanoma-transfer"

In [4]:
def get_image_files(datadir, listImages=[], left_only=False):
    fs = glob('{}/*'.format(datadir))
    #print("Total images:",len(fs),len(glob(datadir+"/*")))
    if left_only:
        fs = [f for f in fs if 'left' in f]
    if listImages != []:                                                        # <-- Adapted to return images from the list
        image_name = [img.split(".")[0] for img in listImages]           #listImages contains .tiff and datadir contains .jpg, so extracting image name
        fs = [f for f in fs if f.split("/")[-1].split(".")[0] in image_name]                  # Useful for running with 5x2-fold cross-validation
        print("images listed:",len(listImages)," images loaded:",len(fs))
    return np.array(sorted(fs))

def get_labels(names, labels=None, label_file=None,
               per_patient=False):
    if labels is None:
        labels = pd.read_csv(label_file,
                             index_col=0).loc[names].values.flatten()
    if per_patient:
        left = np.array(['left' in n for n in names])
        return np.vstack([labels[left], labels[~left]]).T
    else:
        return labels

In [5]:
def load_data(protocol,fold,directory,train_retina):
    
    if train_retina:
        files = get_image_files(directory)
    else:
        folds = yaml.full_load(open(git_repo_path+'/folds/'+protocol+'.yml'))
        f0, f1 = fold.split('x')
        train_list = folds['Fold_' + f0][int(f1)-1]
        test_list  = folds['Fold_' + f0][0 if f1=='2' else 1]
        
        train_files = get_image_files(directory, train_list)
        train_names = [os.path.basename(x).split('.')[0] for x in train_files]
        train_labels = get_labels(train_names, label_file=git_repo_path+'/folds/'+protocol+'.csv').astype(np.int32)
        
        test_files = get_image_files(directory, test_list)
        test_names = [os.path.basename(x).split('.')[0] for x in test_files]
        test_labels = get_labels(test_names, label_file=git_repo_path+'/folds/'+protocol+'.csv').astype(np.int32)
        
        if protocol!="protocol3":
            train_labels=to_categorical(train_labels,2)
            test_labels=to_categorical(test_labels,2)
        else:
            train_labels=to_categorical(train_labels,3)
            test_labels=to_categorical(test_labels,3)
        
        train_images=[]
        for file in train_files:
            img=load_img(file,target_size=(224,224))
            train_images.append(img_to_array(img))
        train_images=np.array(train_images)
        
        test_images=[]
        for file in test_files:
            img=load_img(file,target_size=(224,224))
            test_images.append(img_to_array(img))
        test_images=np.array(test_images)
        print("Train images:   Images:",train_images.shape," labels:",train_labels.shape)
        print("Test images:   Images:",test_images.shape," labels:",test_labels.shape)
            
        return train_images,train_labels,test_images,test_labels


In [6]:
def vgg_m(classes):
    model = Sequential()
    model.add(Conv2D(filters=96,kernel_size=(7,7),strides=(2,2), padding='valid', activation='relu',input_shape=(224,224,3)))
    model.add(Lambda(tf.nn.local_response_normalization))
    model.add(MaxPooling2D((3,3), strides=(2,2)))
    model.add(Conv2D(filters=256,kernel_size=(5,5),strides=(2,2), padding='same', activation='relu'))
    model.add(Lambda(tf.nn.local_response_normalization))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    model.add(Conv2D(filters=512,kernel_size=(3,3),strides=(1,1), padding='same', activation='relu'))
    model.add(Conv2D(filters=512,kernel_size=(3,3),strides=(1,1), padding='same', activation='relu'))
    model.add(Conv2D(filters=512,kernel_size=(3,3),strides=(1,1), padding='same', activation='relu'))
    model.add(MaxPooling2D((3,3), strides=(2,2)))
    model.add(Flatten())
    model.add(Dense(4096, activation="relu"))
    model.add(Dropout(0.5))
    model.add(Dense(4096, activation="relu"))
    model.add(Dropout(0.5))
    model.add(Dense(classes, activation="softmax"))

    
    return model

In [7]:
def main(train_dir,protocol,model_name,weight_dir,train_retina=False):
    num_output=2
    if protocol == "protocol3":
        num_output=3
    features_save_path="./"+model_name+"_"+protocol+"_features"
    os.mkdir(features_save_path)
    for i in range(1,6):
        for j in range(1,3):
            fold=str(i)+"x"+str(j)
            dest_path=features_save_path+"/"+fold
            os.mkdir(dest_path)
            print(weight_dir+"/"+fold," ",glob(weight_dir+"/"+fold+"/*.hdf5"))
            weight_path=glob(weight_dir+"/"+fold+"/*.hdf5")[0]
            
            #Load the data
            train_images,train_labels,test_images,test_labels=load_data(protocol,fold,train_dir,train_retina)
            
            print("VGGM model")
            vggm=vgg_m(num_output)
            
                    
            print("Loading pre-trained weights")
            vggm.load_weights(weight_path)
            vggm.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.005), metrics=['accuracy',tf.keras.metrics.AUC()])

            
            print("-------------------extracting features for fold:",fold,"--------------------------------")
            
            extractor = Model(inputs=vggm.inputs,outputs=[layer.output for layer in vggm.layers])
            
            train_features=[]
            train_feature_labels=[]
            for img,label in zip(train_images,train_labels):
                arr4d = np.expand_dims(img, 0)
                extracted_features=extractor(arr4d)
                train_features.append(extracted_features[-2]) #Saving 4096 feature from last Dense layer
                train_feature_labels.append(label)
            np.save(os.path.join(dest_path,"train_features.npy"),train_features)
            np.save(os.path.join(dest_path,"train_featureLabels.npy"),train_feature_labels)
            
            test_features=[]
            test_feature_labels=[]
            for img,label in zip(test_images,test_labels):
                arr4d = np.expand_dims(img, 0)
                extracted_features=extractor(arr4d)
                test_features.append(extracted_features[-2]) #Saving 4096 feature from last Dense layer
                test_feature_labels.append(label)
            np.save(os.path.join(dest_path,"test_features.npy"),test_features)
            np.save(os.path.join(dest_path,"test_featureLabels.npy"),test_feature_labels)

            

In [8]:
#change the model_name to create the folder depending on the model
#Also change the path to model weight
#To get weights of different protocol go to melanoma_skin_cancer version 6,7,8 ouputs and load in E_protocol1_weights dataset
main(melanoma_dir,"protocol1","A","../input/model-a-weights/E_protocol1_scratch_weights")


../input/model-a-weights/E_protocol1_scratch_weights/1x1   ['../input/model-a-weights/E_protocol1_scratch_weights/1x1/weights-epoch-30-loss-0.65.hdf5']
images listed: 487  images loaded: 475
images listed: 487  images loaded: 473
Train images:   Images: (475, 224, 224, 3)  labels: (475, 2)
Test images:   Images: (473, 224, 224, 3)  labels: (473, 2)
VGGM model
Loading pre-trained weights
-------------------extracting features for fold: 1x1 --------------------------------
../input/model-a-weights/E_protocol1_scratch_weights/1x2   ['../input/model-a-weights/E_protocol1_scratch_weights/1x2/weights-epoch-18-loss-0.64.hdf5']
images listed: 487  images loaded: 473
images listed: 487  images loaded: 475
Train images:   Images: (473, 224, 224, 3)  labels: (473, 2)
Test images:   Images: (475, 224, 224, 3)  labels: (475, 2)
VGGM model
Loading pre-trained weights
-------------------extracting features for fold: 1x2 --------------------------------
../input/model-a-weights/E_protocol1_scratch_wei

In [9]:
import shutil
shutil.make_archive('sequence', 'zip', './A_protocol1_features')

'/kaggle/working/sequence.zip'

In [None]:
import os
os.chdir('./E_protocol2_features')

In [None]:
from IPython.display import FileLink
FileLink('sequence.zip')