# Feature Extraction

## I. Imports

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import ndimage
from skimage.io import imread
from time import time
from skimage.measure import shannon_entropy
from skimage.measure import regionprops
from scipy.stats import gaussian_kde
from os import mkdir, path
import cv2
import warnings
warnings.filterwarnings("ignore")


from src.features import *
from util.preprocessing import *

In [3]:
# read train and test data
df = pd.read_csv('data/train.csv')
df_test = pd.read_csv("./data/test.csv")

# X_train and y_train are used for the learning partss
X_train = df['ImageId'].values
y_train = df['Malignant'].values
n = X_train.shape

# X_test is used to produce the final estimation
X_test = df_test['ImageId'].values
p = X_test.shape

# number of images in each sets
print('number of melanoma images : ', np.sum(y_train))
print('number of benign images : ', (int)(n - np.sum(y_train)))

number of melanoma images :  282
number of benign images :  418


# ABCD fearures

The following features are based on the article **Performance of a dermoscopy-based computer vision system for the diagnosis of pigmented skin lesions compared with visual evaluation
by experienced dermatologists** , Maciel Zorteaa, Thomas R. Schopfb, Kevin Thonb,al

### Assimetry

In [None]:
# Assimetry of shape
begin = time()
features1_2_train = X_df_train.map(assimitry_Shape)
features1_2_test = X_df_test.map(assimitry_Shape)
end = time()
print('Assimetry of shape features :' , (end-begin))

In [20]:
# adding features 1 and 2 of the training and test set 
f1 = np.zeros(n)
f2 = np.zeros(n)
for i in range(n):
    f1[i] = features1_2_train[i][0]
    f2[i] = features1_2_train[i][1]
df['f1'] = f1
df['f2'] = f2

f1_test = np.zeros(p)
f2_test = np.zeros(p)
for i in range(p):
    f1_test[i] = features1_2_test[i][0]
    f2_test[i] = features1_2_test[i][1]
df_test['f1'] = f1_test
df_test['f2'] = f2_test

In [None]:
# Assimetry of color
begin = time()
features3_4_train = X_df_train.map(assimetry_color) #
features3_4_test = X_df_test.map(assimetry_color)
end = time()
print('Assimetry of color features :' , (end-begin))

In [31]:
### adding features 3 and 4 of the training and test set to the df data structure to keep them as csv files
f3 = np.zeros(n)
f4 = np.zeros(n)
for i in range(n):
    f3[i] = features3_4_train[i][0]
    f4[i] = features3_4_train[i][1]
df['f3'] = f3
df['f4'] = f4

f3_test = np.zeros(p)
f4_test = np.zeros(p)
for i in range(p):
    f3_test[i] = features3_4_test[i][0]
    f4_test[i] = features3_4_test[i][1]
df_test['f3'] = f3_test
df_test['f4'] = f4_test

### Color features

In [None]:
# computes features 13, 14 and 15 for the training and test set
begin = time() 
features13_15_train = X_df_train.map(feature_13_f14_15)
features13_15_test = X_df_test.map(feature_13_f14_15)
end = time()
print('Feature 13,14 and 15 :' , (end-begin))

In [34]:
### adding features 13, 14 and 15 of the training and test set to the df data structure to keep them as csv files
f13 = np.zeros(n)
f14 = np.zeros(n)
f15 = np.zeros(n)
for i in range(n):
    f13[i] = features13_15_train[i][0]
    f14[i] = features13_15_train[i][1]
    f15[i] = features13_15_train[i][2]
df['f13'] = f13
df['f14'] = f14
df['f15'] = f15

f13_test = np.zeros(p)
f14_test = np.zeros(p)
f15_test = np.zeros(p)
for i in range(p):
    f13_test[i] = features13_15_test[i][0]
    f14_test[i] = features13_15_test[i][1]
    f15_test[i] = features13_15_test[i][2]
df_test['f13'] = f13_test
df_test['f14'] = f14_test
df_test['f15'] = f15_test

In [None]:
### computes features 16 to 18 for the training and test set
begin = time()
features16_18_train = X_df_train.map(feature_16_17_18) #3h40min
features16_18_test = X_df_test.map(feature_16_17_18)
end = time()
print('Feature 16,17 and 18 :' , (end-begin))

In [37]:
### adding features 16 to 18 of the training and test set 
f16 = np.zeros(n)
f17 = np.zeros(n)
f18 = np.zeros(n)
for i in range(n):
    f16[i] = features16_18_train[i][0]
    f17[i] = features16_18_train[i][1]
    f18[i] = features16_18_train[i][2]
df['f16'] = f16
df['f17'] = f17
df['f18'] = f18

f16_test = np.zeros(p)
f17_test = np.zeros(p)
f18_test = np.zeros(p)
for i in range(p):
    f16_test[i] = features16_18_test[i][0]
    f17_test[i] = features16_18_test[i][1]
    f18_test[i] = features16_18_test[i][2]
df_test['f16'] = f16_test
df_test['f17'] = f17_test
df_test['f18'] = f18_test

# Border, Dimentions, Entropy,..

The following features are inspired from the work of **Palash Sarkar** 

**Link** : https://github.com/Tejas07PSK/Melanoma-Detection

In [1]:
from src.Palash_Sarkar_work import Prep as p
from src.Palash_Sarkar_work.texture import Haralick as har
from src.Palash_Sarkar_work.texture import King as k
from src.Palash_Sarkar_work.physical import Gabor as g

In [15]:
#create dataset 
def _createDataSet(X):
    
    #initialize dataset
    dset = np.zeros((1,25))

    for i in range(0, X.shape[0]):
        print('processing image {}'.format(X[i]))
        
        image = X[i]
        im = imread( 'images/im/'  + str(image) + '.jpg')
        filemask = 'images/im/{}_segmentation.jpg'.format(image)
        cvu8_mask = imread(filemask).astype(np.uint8)
        thresh, masku8 = cv2.threshold(cvu8_mask, 127, 255, cv2.THRESH_BINARY)
        
        obj = p.Prep(im)
        feobj = har.HarFeat(obj.getSegGrayImg())
        feobj3 = g.Gabor(obj.getSegGrayImg(),masku8, obj.getSegColImg())
        feobj4 = k.KingFeat(obj.getSegGrayImg()) 
        
        featarr = []
        
        #Features list
        featarr.append(feobj.getAngularSecondMomentASM())
        featarr.append(feobj.getEnergy())
        featarr.append(feobj.getEntropy())
        featarr.append(feobj.getContrast())
        featarr.append(feobj.getHomogeneity())
        featarr.append(feobj.getDm())
        featarr.append(feobj.getCorrelation())
        featarr.append(feobj.getHarCorrelation())
        featarr.append(feobj.getClusterShade())
        featarr.append(feobj.getClusterProminence())
        featarr.append(feobj.getMoment1())
        featarr.append(feobj.getMoment2())
        featarr.append(feobj.getMoment3())
        featarr.append(feobj.getMoment4())
        featarr.append(feobj.getDasm())
        featarr.append(feobj.getDmean())
        featarr.append(feobj.getDentropy())
        featarr.append(feobj3.getAsymmetryIndex())
        featarr.append(feobj3.getCompactIndex())
        featarr.append(feobj3.getDiameter())
        featarr.append(feobj3.getColorVariance())
        featarr.append(feobj4.getKingsCoarseness())
        featarr.append(feobj4.getKingsContrast())
        featarr.append(feobj4.getKingsComplexity())
        featarr.append(feobj4.getKingsStrength())
        
        
        featarr = np.asarray(featarr)
        featarr  = np.reshape(featarr , (1,25))
        dset = np.r_[dset,featarr]
        

    return dset[1:]

In [None]:
# Create train and test datasets
data_train =  _createDataSet(X_train)
data_test = _createDataSet(X_test)

# list of feature names
featnames = np.array(['ASM', 'ENERGY', 'ENTROPY', 'CONTRAST', 'HOMOGENEITY', 'DM', 'CORRELATION', 'HAR-CORRELATION', 'CLUSTER-SHADE', 'CLUSTER-PROMINENCE', 'MOMENT-1', 'MOMENT-2', 'MOMENT-3', 'MOMENT-4', 'DASM', 'DMEAN', 'DENTROPY', 'ASYMMETRY-INDEX', 'COMPACT-INDEX', 'DIAMETER', 'COLOR-VARIANCE', 'KINGS-COARSENESS', 'KINGS-CONTRAST', 'KINGS-COMPLEXITY', 'KINGS-STRENGTH'], dtype=object, order='C')

# convert to dataframe
train = pd.DataFrame(data_train)
train.columns = featnames
test = pd.DataFrame(data_test)
test.columns = featnames

In [None]:
# Concatenate the dataframe with the previous extracted features
train_data = pd.concat([df, train], axis=1)
test_data = pd.concat([df_test, test], axis=1)

# Save features to csv
train_data.to_csv('./data/train_data',index=False)
test_data.to_csv('./data/test_data',index=False)