In [4]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly_express as px
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import os
from tqdm import tqdm, tqdm_notebook
import gc
import warnings
warnings.filterwarnings("ignore")

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score, roc_curve
from keras.applications.densenet import preprocess_input, DenseNet121
from keras.models import Model
from keras.layers import GlobalAveragePooling2D, Input, Lambda, AveragePooling1D
import keras.backend as K
from lightgbm import LGBMClassifier

In [22]:
def resize_image(img):
    old_size = img.shape[:2]
    ratio = float(img_size)/max(old_size)
    new_size = tuple([int(x*ratio) for x in old_size])
    img = cv2.resize(img, (new_size[1],new_size[0]))
    delta_w = img_size - new_size[1]
    delta_h = img_size - new_size[0]
    top, bottom = delta_h//2, delta_h-(delta_h//2)
    left, right = delta_w//2, delta_w-(delta_w//2)
    color = [0,0,0]
    new_img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
    return new_img

def load_image(path, img_id):
    path = os.path.join(path,img_id+'.jpg')
    img = cv2.imread(path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    new_img = resize_image(img)
    new_img = preprocess_input(new_img)
    return new_img

In [23]:
img_size = 256
batch_size = 16 #16 images per batch

#train_img_ids = df_train.image_name.values
train_img_ids = df.image_id.values
n_batches = len(train_img_ids)//batch_size + 1

#Model to extract image features
inp = Input((256,256,3))
backbone = DenseNet121(input_tensor=inp, include_top=False)
x = backbone.output
x = GlobalAveragePooling2D()(x)
x = Lambda(lambda x: K.expand_dims(x,axis=-1))(x)
x = AveragePooling1D(4)(x)
out = Lambda(lambda x: x[:,:,0])(x)

m = Model(inp,out)

In [29]:
features = {}
path_1 = '/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_1/'
path_2 = '/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_2/'
for b in tqdm_notebook(range(n_batches)):
    start = b*batch_size
    end = (b+1)*batch_size
    batch_ids = train_img_ids[start:end]
    
    batch_images = np.zeros((len(batch_ids),img_size,img_size,3))
    for i,img_id in enumerate(batch_ids):
        try:
            batch_images[i] = load_image(path_1,img_id)
        except:
            try:
                batch_images[i] = load_image(path_2,img_id)
            except:
                pass
    batch_preds = m.predict(batch_images)
    for i,img_id in enumerate(batch_ids):
        features[img_id] = batch_preds[i]
        
#     if b == 100:
#         print(features)
#         break

  0%|          | 0/626 [00:00<?, ?it/s]



In [30]:
train_feats = pd.DataFrame.from_dict(features, orient='index')
#Save for future reference 
train_feats.to_csv('train_img_features.csv')
train_feats.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,246,247,248,249,250,251,252,253,254,255
ISIC_0027419,0.002376,0.588274,0.017702,0.020545,0.379583,0.002473,0.005535,0.00472,0.153959,0.046983,...,0.328814,0.215515,2.86615,0.126249,1.370708,0.800405,0.906448,2.212421,0.764941,0.867133
ISIC_0025030,0.002567,0.532284,0.022582,0.022108,0.335121,0.002741,0.006038,0.004778,0.118742,0.047763,...,0.650124,0.258772,3.450847,0.307138,1.412516,0.522081,1.321518,2.250861,0.565062,1.104533
ISIC_0026769,0.002732,0.708935,0.017082,0.017649,0.412199,0.002324,0.009454,0.003849,0.165069,0.049396,...,0.550999,0.155106,1.670032,0.316097,0.905594,1.13542,1.74347,1.306672,0.512979,1.256323
ISIC_0025661,0.002619,0.656944,0.007353,0.018472,0.372978,0.002243,0.00831,0.004544,0.101944,0.053569,...,0.535878,0.407855,1.735275,0.673193,1.175149,0.895392,0.957998,1.025301,0.579848,0.989027
ISIC_0031633,0.002983,0.728604,0.016006,0.016868,0.309217,0.002436,0.009378,0.00431,0.138445,0.04434,...,0.571626,0.336474,1.633206,0.595136,1.738639,1.149956,0.894459,1.739522,0.273617,1.268434


In [31]:
train_feats

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,246,247,248,249,250,251,252,253,254,255
ISIC_0027419,0.002376,0.588274,0.017702,0.020545,0.379583,0.002473,0.005535,0.004720,0.153959,0.046983,...,0.328814,0.215515,2.866150,0.126249,1.370708,0.800405,0.906448,2.212421,0.764941,0.867133
ISIC_0025030,0.002567,0.532284,0.022582,0.022108,0.335121,0.002741,0.006038,0.004778,0.118742,0.047763,...,0.650124,0.258772,3.450847,0.307138,1.412516,0.522081,1.321518,2.250861,0.565062,1.104533
ISIC_0026769,0.002732,0.708935,0.017082,0.017649,0.412199,0.002324,0.009454,0.003849,0.165069,0.049396,...,0.550999,0.155106,1.670032,0.316097,0.905594,1.135420,1.743470,1.306672,0.512979,1.256323
ISIC_0025661,0.002619,0.656944,0.007353,0.018472,0.372978,0.002243,0.008310,0.004544,0.101944,0.053569,...,0.535878,0.407855,1.735275,0.673193,1.175149,0.895392,0.957998,1.025301,0.579848,0.989027
ISIC_0031633,0.002983,0.728604,0.016006,0.016868,0.309217,0.002436,0.009378,0.004310,0.138445,0.044340,...,0.571626,0.336474,1.633206,0.595136,1.738639,1.149956,0.894459,1.739522,0.273617,1.268434
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ISIC_0033084,0.002635,0.458511,0.026881,0.017053,0.299433,0.002278,0.004763,0.007214,0.098963,0.051982,...,1.481547,0.411743,0.705094,0.166923,0.652449,0.379149,0.789254,0.792373,0.156602,0.981770
ISIC_0033550,0.002487,0.370352,0.013437,0.022227,0.243084,0.002777,0.004855,0.004880,0.074706,0.044280,...,1.977797,0.452903,0.852068,0.435664,0.872942,0.307644,1.321402,1.381713,0.191204,1.213372
ISIC_0033536,0.002490,0.406810,0.017767,0.019279,0.271738,0.002571,0.005426,0.006172,0.084054,0.046578,...,1.893659,0.426220,1.076961,0.371434,0.878311,0.296566,1.388659,1.117431,0.428357,0.943772
ISIC_0032854,0.003169,0.776487,0.016431,0.020603,0.426338,0.002318,0.007867,0.003339,0.145127,0.049512,...,0.405568,0.106385,2.368244,0.146011,0.752110,1.154994,1.337301,2.238781,0.709772,0.690446
