In [124]:
from sklearn.preprocessing import LabelEncoder
from keras.applications import Xception
from keras.applications import imagenet_utils
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import load_img
from keras.layers import GlobalAveragePooling2D
from keras.models import Model
import numpy as np
import os
import pandas as pd
from pathlib import Path
import pickle
import random

In [128]:
#Input Parameters
TILE_PATH = Path('/gpfs1/scratch/90days/s4436005/img_reg/train_dataset_10x_02_12_19/tiles_10x')
CANCER_PATH = TILE_PATH.joinpath('cancer')
NON_CANCER_PATH = TILE_PATH.joinpath('non-cancer')
BATCH_SIZE = 256

#Output Parameters
OUTPUT_PATH = Path('/gpfs1/scratch/90days/s4436005/img_reg/train_dataset_10x_02_12_19/features')

In [129]:
cancer_tile_paths = [path for path in CANCER_PATH.glob('*.jpeg')]
non_cancer_tile_paths = [path for path in NON_CANCER_PATH.glob('*.jpeg')]
all_tile_paths = cancer_tile_paths + non_cancer_tile_paths
random.shuffle(all_tile_paths)

In [125]:
xception_base = Xception(weights='imagenet', include_top=False)
x = xception_base.output
features = GlobalAveragePooling2D()(x)
model = Model(inputs=xception_base.input, outputs=features)

In [132]:
total_batches = int(np.ceil(len(all_tile_paths)/BATCH_SIZE))
print(total_batches)
features_df = pd.DataFrame()

65


In [None]:
for (batch, i) in enumerate(range(0, len(all_tile_paths), BATCH_SIZE)):
    print('processing batch {0}/{1}'.format(batch + 1, total_batches))
    batch_paths = all_tile_paths[i:i + BATCH_SIZE]
    batch_labels = [path.parent.name for path in batch_paths]
    batch_imgs = []
    for path in batch_paths:
        img = load_img(path, target_size = (299, 299))
        img = img_to_array(img)
        
        img = np.expand_dims(img, axis = 0)
        img = imagenet_utils.preprocess_input(img)
        batch_imgs.append(img)
    batch_imgs = np.vstack(batch_imgs)
    features = model.predict(batch_imgs, batch_size = BATCH_SIZE)
    features = features.reshape((features.shape[0]), 2048)
    batch_df = pd.DataFrame(features)
    batch_df['label'] = batch_labels
    features_df = pd.concat([features_df, batch_df], axis = 'index')

processing batch 1/65
processing batch 2/65
processing batch 3/65
processing batch 4/65
processing batch 5/65
processing batch 6/65
processing batch 7/65
processing batch 8/65
processing batch 9/65
processing batch 10/65
processing batch 11/65
processing batch 12/65
processing batch 13/65
processing batch 14/65
processing batch 15/65
processing batch 16/65
processing batch 17/65
processing batch 18/65
processing batch 19/65


In [113]:
features_df.reset_index(drop = True, inplace = True)

In [None]:
features_df.to_csv(OUTPUT.joinpath('Xception_features.csv'))