In [10]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
from PIL import Image
import pandas as pd
import numpy as np
import os
import pickle
import re
import tensorflow as tf
import tensorflow.python.platform
from tensorflow.python.platform import gfile
from skimage.io import imread
import matplotlib.pyplot as plt

# Extract features by inception-v3

In [3]:
def create_graph():
    with gfile.FastGFile('inception-2015-12-05/classify_image_graph_def.pb', 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    _ = tf.import_graph_def(graph_def, name='')

def extract_features(image): #image is the path of image
    create_graph() #necessary for getting tensors

    with tf.Session() as sess:
        next_to_last_tensor = sess.graph.get_tensor_by_name('pool_3:0') # inception-v3

        image_data = gfile.FastGFile(image, 'rb').read()

        predictions = sess.run(next_to_last_tensor, {'DecodeJpeg/contents:0': image_data})
        return np.squeeze(predictions)

In [None]:
# extract features of all the images
patient_ids = []
dates = []
feature_vects = []
for directory, subdirectories, files in os.walk("../legs_folder_jpg_cnn_Qingjie"):# put here the path of train set
    for file in files:
        path = os.path.join(directory, file)
        if path.endswith(".jpg"):
            patient_ids.append(directory.split("/")[1] + "/" + file.split(".")[0])
            feature_vects.append(extract_features(path))
            
df = pd.DataFrame(feature_vects, index=patient_ids)
pickle.dump(df, open("inception_cnn_features2.pkl", "wb") ) #save the extracted featuers

In [5]:
#read the extracted features
extracted_features = pd.read_pickle("crmo-diagnosis-using-mri/inception/inception_cnn_features_Qingjie_Johnathan.pkl") #here the path of features

In [6]:
extracted_features

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2038,2039,2040,2041,2042,2043,2044,2045,2046,2047
legs_folder_jpg_cnn_Qingjie/11HS_OCMR-gen,0.164151,0.018411,0.860192,0.235207,0.093055,0.857333,0.057959,0.224224,0.18133,0.14506,...,0.407469,0.217232,0.208409,1.007517,0.549417,1.123846,0.627464,0.068723,0.0,0.130673
legs_folder_jpg_cnn_Qingjie/13BL_OCMR-gen,0.264231,0.010349,1.183429,0.015844,0.172795,0.295182,0.25669,0.372902,0.122112,0.35364,...,0.345351,0.483148,0.338158,1.18927,0.051534,1.149649,0.345775,0.287226,0.046311,0.163727
legs_folder_jpg_cnn_Qingjie/14MM_OCMR-,0.246674,0.25738,0.614632,0.015388,0.08356,0.308084,0.022644,0.271424,0.161799,0.17975,...,0.251756,0.590695,0.391394,0.860033,0.325213,0.924889,0.21452,0.233471,0.063212,0.64281
legs_folder_jpg_cnn_Qingjie/15BO_OCMR-,0.169998,0.029488,0.876539,0.047435,0.473674,0.258197,0.310078,0.268706,0.347238,0.137824,...,0.119462,0.488553,0.019853,0.378139,0.120675,1.262191,0.427092,0.697115,0.065668,0.483208
legs_folder_jpg_cnn_Qingjie/16FN_OCMR+,0.334635,0.126451,0.638219,0.01854,0.211393,0.043663,0.255389,0.339354,0.231006,0.283722,...,0.035079,0.101709,0.002696,0.401884,0.035186,0.611416,0.580034,0.6764,0.011819,0.446534
legs_folder_jpg_cnn_Qingjie/17BL_OCMR-gen,0.387841,0.082769,0.891788,0.1751,0.116912,1.100558,0.079015,0.410514,0.104119,0.347073,...,0.394082,0.205385,0.572124,0.329216,0.149632,0.356867,1.017276,0.154578,0.003078,0.301006
legs_folder_jpg_cnn_Qingjie/18MB_OCMR-gen,0.239088,0.167128,0.687956,0.200467,0.064719,0.436957,0.123841,0.25184,0.238598,0.138118,...,0.356292,0.109051,0.358572,0.802959,0.251931,0.926233,0.48932,0.419271,0.000995,0.743945
legs_folder_jpg_cnn_Qingjie/19KW_OCMR-,0.300608,0.127357,0.449541,0.07623,0.100339,0.48132,0.252689,0.245453,0.227858,0.217089,...,0.38057,0.475128,0.457111,0.578536,0.138764,0.706862,0.376726,0.754043,0.03454,0.635976
legs_folder_jpg_cnn_Qingjie/20AJ_OCMR+,0.40567,0.27747,0.346672,0.076771,0.133383,0.514172,0.05759,0.247426,0.269021,0.289391,...,0.120643,0.510688,0.019432,0.704541,0.113509,0.711275,0.107651,0.236153,0.209553,0.566149
legs_folder_jpg_cnn_Qingjie/21LLLY_OCMR-,0.344691,0.148803,0.588681,0.1376,0.117326,0.09935,0.26859,0.150073,0.356997,0.234103,...,0.126279,0.528925,0.062527,0.55194,0.120912,0.735661,0.384907,0.784353,0.018699,0.810304


In [7]:
# prepare the features and the labels
X=extracted_features.values
labels=[]
for i in range(48):
    labels.append(1 if extracted_features.index[i][-1]=='+' else 0)
labels=np.array(labels)

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
X_train=X[:40,:]
X_test=X[40:,:]
y_train=labels[:40]
y_test=labels[40:]
#Scale data
scaler = StandardScaler()
scaler.fit(X_train)
X_train=scaler.transform(X_train)
scaler.fit(X_test)
X_test=scaler.transform(X_test)

# Train SVM for classification

In [11]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
lda = LinearDiscriminantAnalysis()
lda.fit(X_train,y_train)

LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,
              solver='svd', store_covariance=False, tol=0.0001)

In [None]:
#save the lda
f = open('sample_svm.pickle', 'wb')
pickle.dump(lda, f)
f.close()

# Train SVM for framing

## 1. Using HOG to extract features

In [12]:
import cv2, os
import pickle
from sklearn import svm
from skimage import io
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import PIL.Image as Image
import matplotlib.image as mpimg
import matplotlib.pyplot as plt

In [13]:
'''Defaut parametres for cv2.HOGDescriptor(), we can set them by ourselves.'''

winSize = (64,128) 
blockSize = (16,16) # 105 blocks
blockStride = (8,8)
cellSize = (8,8)    # Each block contains 4 cells
Bin = 9
#hog = cv2.HOGDescriptor(winSize,blockSize,blockStride,cellSize,Bin)
hog = cv2.HOGDescriptor()

In [None]:
'''Using HOG to extract features: to obtain (photoID,HOGvectors,label) likely pairs for training'''


def add_label(id, feature, label):
    #build (photoID,HOGvectors,label) likely pairs for training
    global samples
    samples[id] = np.insert(feature, 0, label)

    
    
path = "data" # file path for data
samples = {} # To save data paires likely (photoID,HOGvectors,label) 
count = 0   # index for samples
i = 0 
label = 1  # 1 means having lesion 
for p,dirs,fnames in os.walk(path):
    if p == 'data\\':
        pass
    if i == 2 :
        label=0  # 0 means no lesion
    for fname in fnames:
        pth = os.path.join(p, fname)
        img = cv2.imread(pth)
        img = cv2.resize(img,(64,128))
        feature = hog.compute(img)   # Having totally 3780 hog features, blocks * cells * bins = 105*4*9= 3780
        
        if feature is None:
            pass
        else:
            feature = feature.ravel()
            samples[count] = np.insert(feature, 0, label)
            count+=1    
    i+=1

## 2. Training SVM model

In [None]:
''' Training  SVM  model . '''

def svm_(train_x,train_y):
    #clf = svm.SVC(kernel="linear", cache_size=50)
    clf = LinearDiscriminantAnalysis()
    clf.fit(train_x, train_y)
    return clf

train = []
for v in samples.values(): 
    v = v.tolist()
    train.append(v)

train = np.array(train)  

train_x = train[:,1:]  # features without label
train_y = train[:,0]  # labels

print("training...")
clf = svm_(train_x, train_y)
f = open('training_result.pickle', 'wb') # Save the SVM model and its weights in 'training_result.pickle'.
pickle.dump(clf, f)
print("finished")
f.close()