## Classification of Histology images

** Feature extraction through pretrained model + hand picked features **

In [1]:
#importing libraries
import os
import re
import cv2 as cv
import pickle
import numpy as np
import pandas as pd
from sklearn.svm import SVC
import keras.backend as K
from keras.models import Model
from keras.engine.topology import Input
from keras.preprocessing import image
K.set_image_data_format('channels_last')
from sklearn.decomposition import PCA
from keras.utils.vis_utils import plot_model
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
#path to folder containing four subfolders

root_dir = 'C:\\Users\\ANIKET RAJ\\Desktop\\keras'

In [3]:
#splitting into training and test set

def preprocess_for_training(X,y):
    le = LabelEncoder()
    y = le.fit_transform(y)
    X_train, X_test, y_train, y_test = train_test_split(X,y, test_size= 0.3, shuffle=True,random_state=42)
    return X_train, y_train, X_test, y_test

In [4]:
def preprocess_for_reduction(data):
    return data.T

In [5]:
def reduce_feature_dimension(data, n_components,mode=None):
    data = preprocess_for_reduction(data)
    if mode in ['svd_rbf','kmeans',]:
        reducer = PCA(n_components=n_components, whiten=True)
        reduced = reducer.fit_transform()
        return reduced
    else:
        reducer = PCA(n_components=n_components)
        reducer.fit(data)
        reduced = reducer.transform(data)
        return reduced

In [6]:
#augmentation of feature vectors from different methods

def add_features(sample1):
    X_complete = np.hstack(sample1)
    return X_complete

###   Feature extraction by VGG16

In [8]:
height_vgg16 = 224
width_vgg16 = int(height_vgg16*0.75)
target_size_vgg16 = (width_vgg16,height_vgg16)

In [9]:
layer_name_vgg16 = 'block3_pool'
model_input_vgg16 = Input((height_vgg16, width_vgg16, 3))
#base_model = VGG19(include_top = False,weights='imagenet',input_tensor = model_input, input_shape=target_size)
base_model_vgg16 = VGG16(weights='imagenet', include_top=False,input_tensor = model_input_vgg16, input_shape=(height_vgg16, width_vgg16, 3))
model_vgg16 = Model(inputs=model_input_vgg16, outputs=base_model_vgg16.get_layer(layer_name_vgg16).output)
model_vgg16.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 168, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 168, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 168, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 84, 64)       0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 84, 128)      73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 84, 128)      147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 42, 128)       0         
__________

In [None]:
def feature_extract_from_VGG16(image_path ,target_size=target_size_vgg16):
                    Model = model_vgg16
                    
                    img = cv.imread(image_path)
                    img = cv.resize(img, target_size)
                    img = img/255.0
                    #img = image.img_to_array(img)
                    #img = img/255.0
                    x = np.expand_dims(img, axis=0)
                    
                    #x = preprocess_input(x)

                    features = Model.predict(x)
                    
                    return features
    

In [None]:
def create_feature_mat_VGG16(root_dir, shape):
    img_re = re.compile(r'.+\.(tif)$', re.IGNORECASE)
    feature_mat = np.zeros((shape[0],shape[1]))
    labels = []
    for root, dirs, files in os.walk(root_dir):
        for k,name in enumerate(sorted(dirs)):
            sub_path = os.path.join(root,name)
            for i,files in enumerate(os.listdir(sub_path)):
                if img_re.match(files):
                    image_path = os.path.join(sub_path,files)
                    features = feature_extract_from_VGG16(image_path=image_path)
                    features = features.reshape(-1)
                    feature_mat[:,i] = features
                    labels.append(name)
    return feature_mat, labels

In [None]:
nb_train_vgg16 = 400
n_features_vgg16 = 150528
nb_test_class_vgg16 = 4
shape_vgg16 = (n_features_vgg16, nb_train_vgg16)

features_vgg16, labels_vgg16 = create_feature_mat_VGG16(root_dir=root_dir,shape=shape_vgg16)

In [None]:
#writing pickle file with features
fileObject = open('VGG16_conv4_layer_features','wb')
pickle.dump(features, fileObject)
fileObject.close()

In [13]:
labels_vgg16

['Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',
 'Benign',

In [71]:
X_vgg16 = reduce_feature_dimension(features_vgg16, n_components=100)
X_vgg16.shape
X_vgg16

array([[2095.10988883],
       [2274.67814144],
       [1918.25053715],
       [2390.05570884],
       [2531.76129728],
       [2592.78863574],
       [2466.18334495],
       [2226.2214887 ],
       [2279.59289852],
       [2257.84306193],
       [2208.5786437 ],
       [2227.39480639],
       [2183.71101853],
       [2204.58317812],
       [2402.52628059],
       [2617.51557174],
       [2575.28459845],
       [1917.74013927],
       [2074.752119  ],
       [2374.34361675],
       [2304.10709644],
       [1998.41722567],
       [2287.77430002],
       [2336.65559174],
       [2349.03635256],
       [1920.37888864],
       [2221.50870709],
       [2255.70026358],
       [2484.37676192],
       [2217.74757377],
       [2330.40963595],
       [2148.2020235 ],
       [2007.50253068],
       [2126.93348637],
       [1983.71892425],
       [2184.78070638],
       [2144.64312796],
       [2038.42038566],
       [2351.88265732],
       [2214.1021062 ],
       [2220.62769397],
       [2363.572

In [43]:
X_train,y_train , X_test , y_test = preprocess_for_training(X_vgg16,labels_vgg16)

In [44]:
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)
print('test set accuracy = ',svm_model.score(X_test, y_test))
print('training set accuracy = ',svm_model.score(X_train, y_train))

test set accuracy =  0.5166666666666667
training set accuracy =  0.4928571428571429


####  Reading hand picked features

In [None]:
x = pd.read_csv('C:\\Users\\ANIKET RAJ\\Desktop\\keras\\extraf.csv')

x.sort_values(by='name',inplace=True)

x['label'] = x['label'].map({2:'Benign',1:'Normal',3:'InSitu',4:'Invasive'})
col = x.columns
xtra_feature = x.iloc[:,1:4].values

#### combining features and calculating accuracy

In [None]:
print('-------  combining vgg16 and our own feature  --------')

X_complete = add_features([X_vgg16,xtra_feature])
print('feature vector shape --> ',X_complete.shape)
X_train,y_train , X_test , y_test = preprocess_for_training(X_complete,labels_vgg16)
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)
print('test set accuracy = ',svm_model.score(X_test, y_test))
print('train set accuracy = ',svm_model.score(X_train, y_train))

print('______________________________________________________')