In [36]:
%matplotlib inline 
from skimage import data, exposure, measure
from skimage import filters, morphology
from skimage.feature import blob_dog, blob_log, blob_doh
from math import sqrt
from skimage.color import rgb2gray
from scipy import ndimage
import numpy as np

import matplotlib.pyplot as plt

from skimage import io

import os, sys

from sklearn.cross_validation import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import make_pipeline
from sklearn import preprocessing
from sklearn.svm import SVC
from sklearn import tree
from sklearn.ensemble import AdaBoostClassifier

In [37]:
def showimg(img):
    io.imshow(img)
    io.show()

In [38]:
# getFiles gets all the unique files from the given directory path, without the annotations
def getFiles(path):
    files = os.listdir(path)
    all_files = np.empty(0)
    for f in files:
        [ff, _ ]= f.split('_',1)
        all_files = np.append(all_files,os.path.join(path,ff))
    return np.unique(all_files)

files = getFiles('G:\Projects\CADIMI\data')

In [48]:
# createMask creates a mask used for taking only pixels from the brain, and not from the backgraound
def createMask(filename):
    filename = filename + '_fl.png'
    img = io.imread(filename)
    mask = img>0
    return mask

# reads the images with flair and the answer
def readImageFL(filename):
    img = io.imread(filename + '_fl.png')
    ans = io.imread(filename + '_an.png')
    return (img,ans)

def readImageT1(filename):
    img = io.imread(filename + '_t2.png')
    ans = io.imread(filename + '_an.png')
    return (img,ans)

In [52]:
# features receives the image, the mask and the answer and compute the features for that singular image applying the mask
def features(img, mask,ans):
    f1 = img.copy()
    #intensity feature
    intensity = f1[mask]
    binary_img = img.copy()
    binary_img[img>0] = 1
    #distance feature
    distance = ndimage.distance_transform_edt(morphology.binary_dilation(binary_img,morphology.disk(2)))[mask]
    #blobness feature
    blobness = ndimage.filters.gaussian_laplace(binary_img, 4)[mask]
    ans = ans[mask]
    return (np.vstack((intensity, distance, blobness)).T, ans)

# runs the feature method for the images, and returns all the features and the answers, this is the dataset
def allFeatures(fileNames):
    allF = []
    allAns = []
    for f in fileNames:
        mask = createMask(f)
        img, ans = readImageFL(f)
        f3, ansMasked = features(img,mask,ans)
        allF.append(f3)
        allAns.append(ansMasked)
    return (np.vstack(allF),np.hstack(allAns))

f3, ans = allFeatures(files)
print len(f3), len(ans)

3796529 3796529


In [53]:
#Balanced the dataset by keeping some random exemples to have closer number between positive and negative samples

trueInd = ans > 0
noTrue = trueInd.sum()
noFalse = len(ans) - noTrue
newFalse = int(noTrue * 1.04)
print noTrue, noFalse, newFalse

newindices = np.random.choice(noFalse, newFalse, replace = False)
allFalse = f3[~trueInd]
print allFalse.shape

newFalseFeatures = allFalse[newindices]
print newFalseFeatures.shape

true_f = f3[trueInd]
print true_f.shape

balancedFeatures = np.vstack((newFalseFeatures, true_f))
af = np.zeros(newFalse, dtype = int)
at = np.ones(noTrue, dtype = int)
print af, at
balancedAns = np.hstack( [af, at] )
print balancedFeatures.shape
print noTrue + newFalse

67811 3728718 70523
(3728718L, 3L)
(70523L, 3L)
(67811L, 3L)
[0 0 0 ..., 0 0 0] [1 1 1 ..., 1 1 1]
(138334L, 3L)
138334


RandomForestClassifier:

In [58]:
clf = make_pipeline(preprocessing.StandardScaler(), RandomForestClassifier(n_estimators = 100))
cross_val_score(clf, balancedFeatures, balancedAns, cv=5)

array([ 0.81151511,  0.82329129,  0.74073806,  0.81967035,  0.83767079])

SVC:

In [8]:
clf = make_pipeline(preprocessing.StandardScaler(), SVC(kernel='linear', probability = True))
cross_val_score(clf, balancedFeatures, balancedAns, cv=5)

array([ 0.81179158,  0.82195009])

DecisionTreeClassifier:

In [10]:
clf = make_pipeline(preprocessing.StandardScaler(), tree.DecisionTreeClassifier())
cross_val_score(clf, balancedFeatures, balancedAns, cv=5)

array([ 0.78847155,  0.79900529])

AdaBoostClassifier:

In [56]:
clf = make_pipeline(preprocessing.StandardScaler(), AdaBoostClassifier(n_estimators=100))
cross_val_score(clf, balancedFeatures, balancedAns, cv=5)

array([ 0.86182594,  0.87201359,  0.76101493,  0.85418926,  0.88108147])