# In this notebook, we explore various models and their performance on the dataset

Jack Duryea (jd50), Shashank Mahesh (sm103)


In [1]:
import pathlib
import imageio
import numpy as np
import matplotlib.pyplot as plt
import os
from skimage.color import rgb2gray
from scipy import stats
import glob
import data_utils as du
import cv2
from collections import defaultdict
from skimage.color import rgb2gray
import models
import sklearn
import scipy.stats as stats
from sklearn.metrics import confusion_matrix
import warnings
warnings.simplefilter("ignore", DeprecationWarning)
with warnings.catch_warnings():
    warnings.filterwarnings("ignore",category=DeprecationWarning)
    import md5, sha
%matplotlib inline

%load_ext autoreload
%autoreload 2

# Models Selection
We are currently thinking about using the following models:  
Otsu Thresholding   
Spectral clustering 

### Load data

In [None]:
train_image_data = du.get_train_images() 
train_mask_data  = du.get_train_masks()
test_image_data = du.get_test_images()

In [None]:
im1 = train_image_data[0]
m1 = train_mask_data[10]
m2 = train_mask_data[11]

### combine the masks for each image, these are the "Truths"

In [None]:
total_masks = []
for image in train_image_data:
    Big_Mask = du.get_total_mask(image, train_mask_data)
    total_masks.append(Big_Mask)

# Model 1: Naive Thresholding

In [None]:
# An example of how this model works along with visualization
index = 101
im1 = train_image_data[index]
truth = total_masks[index]
predicted = models.naive_threshold(im1)
plt.imshow(im1.im)
print "Actual Image"
plt.show()
plt.imshow(truth)
print "Actual Nuclei"
plt.show()
plt.imshow(predicted)
print "Predicted Nuclei"
plt.show()


models.score(truth, predicted)

In [None]:
# Get global score
num_examples = 100
accs = []
for i in range(num_examples):
    im1 = train_image_data[i]
    truth = total_masks[i]
    predicted = models.naive_threshold(im1)
    acc, tn, fp, fn, tp, conf_mat = models.score(truth, predicted)
    accs.append(acc)

In [None]:
plt.xlabel("Accuracy")
plt.ylabel("Freq")
plt.hist(accs)

### Hyperparamter tuning, change threshold

In [None]:
thresholds = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
avg_accs = []
for thr in thresholds:
    print "thr:",thr
    num_examples = 30
    accs = []
    for i in range(10, 10+num_examples):
        im1 = train_image_data[i]
        truth = total_masks[i]
        predicted = models.naive_threshold(im1, thr)
        acc, tn, fp, fn, tp, conf_mat = models.score(truth, predicted)
        accs.append(acc)
    avg_acc = np.mean(accs)
    avg_accs.append(avg_acc)

In [None]:
plt.xlabel("Threshold")
plt.ylabel("Average Accuracy")
plt.plot(thresholds, avg_accs)

### From a small sample, it seems that a threshold of 0.1 works the best, let's scale it up a bit

In [None]:
len(train_image_data)

In [None]:
num_examples = 10
accs = []
tps = 0
fps = 0
fns = 0
tns = 0
for i in range(0, num_examples):
    im = train_image_data[i]
    truth = total_masks[i]
    predicted = models.naive_threshold(im, thresh=0.1)
    acc, tn, fp, fn, tp, conf_mat = models.score(truth, predicted)
    accs.append(acc)
    tns += tn
    fps += fp
    fns += fn
    tps += tp


In [None]:
acc, pre, rec, f1_score = models.get_metrics(tps, fps, tns, fns)


In [None]:
du.plot_confusion_matrix([[tps,fns],[fps,tns]])

# Model 2: Otsu Thresholding

In [None]:
num_examples = 10
accs = []
tps = 0
fps = 0
fns = 0
tns = 0
for i in range(0, num_examples):
    im = train_image_data[i]
    truth = total_masks[i]
    predicted = models.otsu_threshold(im)
    acc, tn, fp, fn, tp, conf_mat = models.score(truth, predicted)
    accs.append(acc)
    tns += tn
    fps += fp
    fns += fn
    tps += tp

In [None]:
acc, pre, rec, f1_score = models.get_metrics(tps, fps, tns, fns)

In [None]:
du.plot_confusion_matrix([[tps,fns],[fps,tns]])

# Model 3: Slic Thresholding

In [None]:
num_examples = 10
accs = []
tps = 0
fps = 0
fns = 0
tns = 0
for i in range(0, num_examples):
    im = train_image_data[i]
    truth = total_masks[i]
    predicted = models.random_walk_threshold(im)
    acc, tn, fp, fn, tp, conf_mat = models.score(truth, predicted)
    accs.append(acc)
    tns += tn
    fps += fp
    fns += fn
    tps += tp

In [None]:
from skimage.segmentation import felzenszwalb, slic, quickshift, watershed

In [None]:
acc, pre, rec, f1_score = models.get_metrics(tps, fps, tns, fns)

In [None]:
img = train_image_data[100].im
test = slic(img, n_segments=20, compactness=10, sigma=1)

In [None]:
plt.imshow(test)

In [None]:
plt.imshow(train_image_data[100].im)

### scratch work

In [None]:
sub_images, sub_masks = du.convolve(train_image_data[0],total_masks[0])

In [None]:
plt.imshow(sub_images[0])
plt.show()
plt.imshow(sub_masks[0])

In [None]:
len(sub_images)

In [None]:
Big_Mask = du.get_total_mask(im1, train_mask_data)

In [None]:
plt.imshow(Big_Mask,cmap='Greys')

In [None]:
plt.imshow(im1.im,cmap='Greys')

In [None]:
plt.imshow(Big_Mask,cmap='Greys')

In [None]:
plt.imshow(m2.im,cmap='Greys')

In [None]:
plt.imshow(m2.im + m1.im,cmap='Greys')

In [None]:
m1.im.shape

In [None]:
im1.im.shape

In [None]:
plt.imshow(rgb2gray(im1.im),cmap='Greys')

In [None]:
"00071198d059ba7f5914a526d124d28e6d010c92466da21d4a04cd5413362552"
im1.dir_id

In [None]:
mask = models.naive_threshold(im1)

In [None]:
bw = rgb2gray(im1.im)

In [None]:
bw2 = np.where(bw>0.1,1,0)

In [None]:
plt.imshow(mask,cmap='Greys')

In [None]:
du.encode(bw2)

In [None]:
test_rle_string = du.rle_to_string(du.encode(bw2))
test_rle_string

In [None]:
du.rle_decode(test_rle_string,mask.shape, mask.dtype )

In [None]:
mask

In [None]:
m = m1.im
m[m > 1] = 1

In [None]:
stats.describe(mask.flatten())

In [None]:
m

In [None]:
mask

In [None]:
ytrue = np.array([1,0,0])
ypred = np.array([1,1,1])
models.score(m1.im, mask)

In [None]:
m3 = m2.im + m1.im

In [None]:
plt.imshow(m3)

In [None]:
np.count_nonzero(m1.im.flatten())

In [None]:
np.count_nonzero(m2.im.flatten())

In [None]:
np.count_nonzero(m3.flatten())

In [None]:
m3[m3 > 0]=1

In [None]:
m3

In [None]:
plt.imshow(m3)

In [None]:
MASK = du.combine_masks([m1,m2])

In [None]:
plt.imshow(MASK)