# Automatically label lab data

This notebook uses a combination of computer vision techniques, namely adaptive thresholding, morpholohy, and a conditional random field (CRF) to automatically segment the mussels on the black board with white lines in the lab.

To do:
- estimate number of pixels per square to correct for camera distance
- Lab_3800-3_2018-08-13, crop too much from top
- Lab_3784-2_2018-07-05, junk on top

In [None]:
import os
import os.path as osp
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from glob import glob
from tqdm import tqdm

import pydensecrf.densecrf as dcrf
import pydensecrf.utils as utils

import sys
sys.path.append('../../predict')
sys.path.append('../')

from power_law import *
from data_utils import lblsave

%matplotlib inline

In [None]:
DATA_PATH = osp.join(os.environ['DATA_PATH'], 'cciw/Data')
SAVE_PATH = osp.join(os.environ['DATA_PATH'], 'cciw/dataset_raw/Test/Lab-tmp')

In [None]:
imagetable_path = os.path.join(DATA_PATH, 'Tables', 'ImageTable.csv')
image_df = pd.read_csv(imagetable_path, index_col=0)
analysis_path = os.path.join(DATA_PATH, 'Tables', 'Analysis.csv')
dive_path = os.path.join(DATA_PATH, 'Tables', 'Dives.csv')
analysis_df = pd.read_csv(analysis_path, index_col=0, dtype={'Count':float})
dive_df = pd.read_csv(dive_path, index_col=0, parse_dates=['Date'])
data_df = pd.merge(analysis_df, dive_df, on='Dive Index', how='outer')

In [None]:
# Search for all image files in testing set...
all_images = glob(os.path.join(DATA_PATH,'Videos_and_stills/TestingSet/Lab/*/*/*/Images/Quad*/*.jpg'))
len(all_images)

all_images.sort()

In [None]:
analysis_df[['16mm', '14mm', '12.5mm', '10mm', '8mm', '6.3mm', '4mm', '2mm']].hist(figsize=(15,15))

# meta-parameters

In [None]:
'''
@param blockSize Size of a pixel neighborhood 
       that is used to calculate a threshold value for the pixel.
@param C Constant subtracted from the mean or weighted mean 
       (see the details below). Normally, it is positive but may be zero 
       or negative as well.
@param k_size morphology structuring element size
'''
blockSize  = 301
C_constant = 2
k_size     = 11

min_area = 40000
max_area = 300000

corn = 450
buf = 100
bottom_cut = 150
horiz_cut = 200
right_cut = 100

# HoughLinesP
rho = 10
theta = np.pi / 45
threshold = 500
mLL = 500
mLG = 20

# Show results along the way
DO_PLOT = False

# Run conditional random field post processing to retrieve missing shell pieces
DO_CRF = False # can increase processing time by 20 seconds per image
MAX_ITER = 10

SAVE = False

In [None]:
#all_images

In [None]:
k_25 = np.ones((25, 25), np.uint8)
k_120 = np.ones((120, 120), np.uint8)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (k_size, k_size))

pix_ct = []
mussel_ct = []
size_dist = []
for i in tqdm(range(len(all_images))):
#for i in tqdm(range(5)):
    im   = cv2.imread(all_images[i])
    rgb  = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
    gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
    th1  = cv2.adaptiveThreshold(
        gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, blockSize, C_constant)
    erosion = cv2.erode(th1, kernel, iterations=2)
    close = cv2.dilate(erosion, kernel, iterations=1)
    '''
    @param mode cv2.RETR_EXTERNAL retrieves only the extreme outer contours.
    @param method cv2.CHAIN_APPROX_SIMPLE compresses horizontal, vertical, 
           and diagonal segments and leaves only their end points. For example, 
           an up-right rectangular contour is encoded with 4 points.
    '''
    cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]

    for c in cnts:
        area = cv2.contourArea(c)
        if area > min_area: #and area < max_area:
            x, y, w, h = cv2.boundingRect(c)
            cv2.rectangle(rgb, (x, y), (x + w, y + h), (36, 255, 12), 5)
            close[y:y+h, x:x+w] = 0

    if DO_PLOT:
        fig, axes = plt.subplots(1, 2, figsize=(16, 12))

    close[:, :horiz_cut] = 0
    close[:, close.shape[1] - right_cut:] = 0
    close[close.shape[0] - bottom_cut:, :] = 0
    close[:bottom_cut, :] = 0
    close = cv2.dilate(close, kernel, iterations=1)

    # to remove leftover dots
    t = cv2.erode(close, k_25, iterations=1)
    mask = cv2.dilate(t, k_120, iterations=1)
    seg_mask = close & mask
    _, cts = np.unique(seg_mask, return_counts=True) 

    # may find spurious lines if more than 2M pixels
    if cts[1] < 2000000:
        linesP = cv2.HoughLinesP(seg_mask, rho, theta, threshold=threshold, minLineLength=mLL, maxLineGap=mLG)
        if linesP is not None:
            for j in range(len(linesP)):
                l = linesP[j][0]
                if np.abs(l[1] - l[3]) < 50:
                    #print(i, 'found horiz line: ', j)
                    x_start = np.minimum(l[0], l[2])
                    x_end = np.maximum(l[0], l[2])
                    seg_mask[l[3] - buf:l[1] + buf, 
                             np.maximum(x_start - buf * 10, 0):np.minimum(
                                 x_end + buf * 10, seg_mask.shape[1])] = 0

    # upper left corner
    seg_mask[:corn, :corn] = 0
    # upper right corner
    seg_mask[:corn, seg_mask.shape[1]-corn:] = 0
    # bottom left corner
    seg_mask[seg_mask.shape[0]-corn:, :corn] = 0
    # bottom right corner
    seg_mask[seg_mask.shape[0]-corn:, seg_mask.shape[1]-corn:] = 0

    # CRF Post-processing
    if DO_CRF:
        img = np.ascontiguousarray(rgb)
        labels = np.stack([seg_mask, 1 - seg_mask])
        c, h, w = labels.shape[0], labels.shape[1], labels.shape[2]
        labels = labels.astype('float') / labels.max()

        U = utils.unary_from_softmax(labels)
        U = np.ascontiguousarray(U)
        d = dcrf.DenseCRF2D(w, h, c)
        d.setUnaryEnergy(U)
        """
        @param compat=3, Potts model - it introduces a penalty for nearby similar 
        pixels that are assigned different labels. 
        """
        # This adds the color-independent term, features are the locations only.
        d.addPairwiseGaussian(sxy=3, compat=3)
        # This adds the color-dependent term, i.e. features are (x,y,r,g,b).
        # im is an image-array, e.g. im.dtype == np.uint8
        d.addPairwiseBilateral(sxy=80, srgb=13, rgbim=img, compat=10)
        Q = d.inference(MAX_ITER)
        Q = np.array(Q).reshape((c, h, w))
        # binarize output
        Q[0][Q[0] >= 0.5] = 1
        Q[0][Q[0] < 0.5] = 0
        crf_mask = (Q[0] * 255).astype('uint8')
        _, cts = np.unique(crf_mask, return_counts=True)

    pix_ct.append(cts[1] / cts.sum())

    if DO_PLOT:
        axes[0].set_title(str(i))
        axes[0].imshow(rgb)
        axes[1].imshow(seg_mask)
        for k in range(len(axes.flat)):
            axes.flat[k].axis('off')
        plt.show()

    #cv2.imwrite(mask_file, seg_mask)
    #cv2.imwrite(jpeg_file, im)
    #lbl = np.zeros((np_img.shape[0], np_img.shape[1]))
    #lbl[(np_img[:, :, 2] ==  60)] = 1
    seg_mask[seg_mask == 255] = 1
    
    if SAVE:
        mask_file = os.path.join(SAVE_PATH, all_images[i].split('/')[-1].split('.')[0] + '_mask.png')
        jpeg_file = os.path.join(SAVE_PATH, all_images[i].split('/')[-1])
        lblsave(mask_file, seg_mask) # save as indexed color RGB image

        if DO_CRF:
            crf_mask_file = os.path.join(SAVE_PATH, all_images[i].split('/')[-1].split('.')[0] + '_mask_crf.png')
            crf_mask[crf_mask == 255] = 1
            lblsave(crf_mask_file, crf_mask) # save as indexed color RGB image
            #cv2.imwrite(crf_mask_file, crf_mask)
            
    vals, cts = count_mussels(rgb, seg_mask)
    
    mussel_ct.append(vals[-1])
    
    cts = cts[2:]
    cts = cts * np.prod(scale[i]) / (15 * 26)
    #freq, bin_edges = np.histogram(cts[cts > 300], bins=bins)
    freq, bin_edges = np.histogram(cts, bins=8)
    size_dist.append(freq / freq.sum())
    
size_dist = np.asarray(size_dist)    

# Compare biomass and fraction of mussel pixels

In [None]:
pix_ct = np.asarray(pix_ct)

lab_targets = np.zeros((len(all_images), 3)) # 0 = biomass, 1 = count
true_size_dist = np.zeros((len(all_images), 8))

names = ['16mm', '14mm', '12.5mm', '10mm', '8mm', '6.3mm', '4mm', '2mm']

names.reverse()
print(names)
sieves = np.array([16, 14, 12.5, 10, 8, 6.3, 4, 2])
sieves = sieves[np.argsort(sieves)]

for i in range(len(all_images)):
#for i in range(5):
    root_fname = all_images[i].split('/')[-1].split('.')[0][4:-8]
    guid = image_df[image_df['Name'].str.contains(root_fname)]['Analysis Index'].astype('int64')
    row = data_df[data_df['Analysis Index'].values == np.unique(guid.values)]
    lab_targets[i, 0] = row['Biomass'].values
    lab_targets[i, 1] = row['Count'].values
    
    #size_dist = np.zeros(len(names))
    for j in range(len(names)):
        true_size_dist[i, j] = row[names[j]].values
        #size_dist[j] = row[names[j]].values        
        
    lab_targets[i, 2] = (lab_targets[i, 0] * true_size_dist[i, :] * (2 / sieves)).sum()

x = pix_ct / pix_ct.max()
biomass = lab_targets[:, 0]
count = lab_targets[:, 1]
count_fr_bio_sz = lab_targets[:, 2]

In [None]:
# manual camera calibration
scale = np.load('../../predict/npy/lab_board_dims_n40.npy')

pix_ct_s = pix_ct.copy()
for i in range(len(all_images)):
    pix_ct_s[i] = pix_ct[i] * (np.prod(scale[i]) / (16 * 25))

In [None]:
fontsize = 16
def plot_powerlaw_1x1(x_data, y_data, x_label='', y_label=''):
    fig, ax = plt.subplots(1, 1, figsize=(4, 4))
    power_law_prediction_ax(ax, x_data, y_data, -2, 0, fontsize)
    ax.set_ylabel(y_label, fontsize=fontsize)
    ax.set_xlabel(x_label, fontsize=fontsize)
    ax.tick_params(labelsize=fontsize)
    plt.tight_layout()
    return fig

In [None]:
'''
fig = plot_powerlaw_1x1(pix_ct_s, biomass, x_label='Predicted Biomass \n Classic Vision', y_label='Biomass')
#fig.savefig('lab_auto_biomass_basic.eps')
#fig.savefig('lab_auto_biomass_basic_camera.eps')

# Predict count from pixels (basic algorithm)
fig = plot_powerlaw_1x1(pix_ct, count, x_label='Predicted Count \n Classic Vision', y_label='Count')
fig.savefig('lab_auto_count_basic.eps')

# Predict count from camera adjusted basic algorithm
fig = plot_powerlaw_1x1(pix_ct_s, count, x_label='Predicted Count \n Classic Vision', y_label='Count')
fig.savefig('lab_auto_count_basic_camera.eps')

# Predict count from watershed + basic algorithm
fig, ax = plt.subplots(1, 1, figsize=(4, 4))
power_law_prediction_ax(ax, pcount, count, 1, 3, fontsize)
ax.set_ylabel('Count', fontsize=fontsize)
ax.set_xlabel('Predicted Count \n Classic Vision', fontsize=fontsize)
ax.tick_params(labelsize=fontsize)
plt.tight_layout()
fig.savefig('lab_auto_count_basic_watershed.eps')

# Plot count from biomass, or biomass + size distribution
fig, ax = plt.subplots(1, 1, figsize=(4, 4))
#power_law_prediction_ax(ax, biomass, count, 1, 3, fontsize)
power_law_prediction_ax(ax, count_fr_bio_sz, count, 0.5, 2.5, fontsize)
ax.set_ylabel('Count', fontsize=fontsize)
ax.set_xlabel('Biomass + \n Size Distribution', fontsize=fontsize)
#ax.set_xlabel('Biomass', fontsize=fontsize)
ax.tick_params(labelsize=fontsize)
plt.tight_layout()
#fig.savefig('lab_auto_count_basic_watershed.eps')
fig.savefig('lab_count_from_biomass_and_distribution.eps')
'''

In [None]:
pcount = np.asarray(mussel_ct)

# Count Mussels Watershed

In [None]:
def count_mussels(image, predictions):
    """ Counts mussels in predicted output.
    
    @param predictions: greyscale predictions as float in [0, 1]
    """
    thresh = (predictions * 255).astype('uint8')

    # noise removal
    kernel = np.ones((3, 3), np.uint8)
    opening = cv2.morphologyEx(predictions, cv2.MORPH_OPEN, kernel, iterations = 2)

    # sure background area
    sure_bg = cv2.dilate(opening, kernel, iterations=3)

    # Finding sure foreground area
    dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5)

    ret, sure_fg = cv2.threshold(dist_transform, 0.4 * dist_transform.max(), 255, 0)

    # Finding unknown region
    sure_fg = np.uint8(sure_fg)
    unknown = cv2.subtract(sure_bg,sure_fg)

    # Marker labelling
    ret, markers = cv2.connectedComponents(sure_fg)

    # Add one to all labels so that sure background is not 0, but 1
    markers = markers + 1

    # Now, mark the region of unknown with zero
    markers[unknown == 255] = 0

    markers = cv2.watershed(image, markers)
    #image[markers == -1] = [255, 0, 0]
    
    vals, cts = np.unique(markers, return_counts=True)
    '''
    bonus = 0
    div = 7500.
    for v in cts[2:][cts[2:] > div]:
        bonus += np.floor(v / div)
    '''
    
    return vals, cts #vals[-1] # + bonus

In [None]:
#seg_mask[1500:1570, 200:270] = 1

In [None]:
#plt.figure(figsize=(16, 10))
#plt.imshow(seg_mask)

In [None]:
vals, cts = count_mussels(rgb, seg_mask)
cts = cts[2:]
cts = cts * np.prod(scale[-1]) / (15 * 26)
len(cts)

In [None]:
cts

In [None]:
cts.max()

In [None]:
names

In [None]:
'''
If *bins* is a sequence, it defines the bin edges, including the
left edge of the first bin and the right edge of the last bin;
in this case, bins may be unequally spaced.  All but the last
(righthand-most) bin is half-open.  In other words, if *bins* is::

    [1, 2, 3, 4]

then the first bin is ``[1, 2)`` (including 1, but excluding 2) and
the second ``[2, 3)``.  The last bin, however, is ``[3, 4]``, which
*includes* 4.
'''
bins = 100 * sieves
bins = np.concatenate((bins, np.array([15000.])))
bins

In [None]:
freq, bin_edges = np.histogram(cts[cts > 300], bins=bins)
freq / freq.sum()

In [None]:
r = plt.hist(cts, bins=bins, align='mid', rwidth=0.8)
plt.xlabel('Number of pixels')
plt.ylabel('Frequency')
print(r[0] / r[0].sum())

In [None]:
y = true_size_dist.copy()
x = true_size_dist.copy()

In [None]:
x = size_dist.copy()

In [None]:
MI = -np.dot(p_xy, np.log2(p_xy / p_x_y))
print(MI)

In [None]:
MI = -np.dot(p_x_y, np.log2(p_xy / p_xy))
print(MI)

In [None]:
np.dot(x, y.T).shape

In [None]:
p_xy

In [None]:
# joint distribution
p_xy = 0
for i in range(len(x)):
    p_xy += (x[i] * y[i])
p_xy #/= len(x)

# product of marginals
p_x_y = np.dot(np.sum(x, axis=0), np.sum(y, axis=0))

MI = -np.dot(p_xy, np.log2(p_xy / p_x_y))
print(MI)

In [None]:
p

In [None]:
#np.dot(x, y.T)

In [None]:
p_x_y = np.dot(np.mean(x, axis=0), np.mean(y, axis=0))

In [None]:
plt.bar(sieves, np.mean(np.round(size_dist, 3)[:, :8], axis=0))
plt.xlabel('Sieve diameter (mm)', fontsize=fontsize)
plt.ylabel('Normalized Frequency', fontsize=fontsize)
plt.tick_params(labelsize=fontsize-4)
plt.tight_layout()
plt.savefig('pred_size_dist_eq_bins.eps')

In [None]:
plt.bar(sieves, np.mean(np.round(true_size_dist, 3), axis=0))
plt.xlabel('Sieve diameter (mm)', fontsize=fontsize)
plt.ylabel('Normalized Frequency', fontsize=fontsize)
plt.tick_params(labelsize=fontsize-4)
plt.tight_layout()
plt.savefig('true_size_dist.eps')
#plt.grid()

In [None]:
#size_dist

In [None]:
#mi(size_dist[:, :8], true_size_dist)

In [None]:
-np.dot(np.mean(y, axis=0), np.log2(np.mean(y, axis=0)))

In [None]:
S = 10
px = np.random.beta(0.6, 0.5, S)
px /= px.sum()
pz = np.random.beta(0.6, 0.5, S)
pz /= pz.sum()


In [None]:
px.shape

In [None]:
def mi(T, Y, num_classes=8):
    """
    Computes the mutual information I(T; Y) between predicted T and true labels Y
    as I(T;Y) = H(Y) - H(Y|T) = H_Y - H_cond_YgT
    @param T: vector with dimensionality (num_instances,)
    @param Y: vector with dimensionality (num_instances,)
    @param num_classes: number of classes, default=10
    """
    #Y = Y.detach().cpu().numpy()
    #T = T.detach().cpu().numpy()

    epsilon = 1e-4 # to prevent divide by zero
    num_instances = Y.shape[0]
    py    = np.zeros(num_classes) # p(y)
    pt    = np.zeros(num_classes) # p(t)
    pygt  = np.zeros(num_classes) # p(y|t)
    H_YgT = np.zeros(num_classes) # H(Y|T)

    # Compute H(Y)
    for i in range(num_classes):
        py[i] = np.sum(Y == i) / float(num_instances)
        pt[i] = np.sum(T == i) / float(num_instances)
        
    H_Y = -np.dot( py, np.log2(py + epsilon) ) # H(Y)

    # Compute H(Y | T)
    for t in range(num_classes):
        t_idx = T == t 
        for y in range(num_classes):
            y_idx = Y == y
            pygt[y] = np.sum(y_idx[t_idx])

        # convert counts to probabilities
        c = np.sum(pygt)
        if c > 0:
            pygt /= c
            H_YgT[t] = -np.dot( pygt, np.log2(pygt + epsilon) )
    
    H_cond_YgT = np.dot( pt, H_YgT )

    return H_Y - H_cond_YgT

# CRF Stratch Space

In [None]:
'''
# Deprecated figure style

fig, ax = plt.subplots(1, 1, figsize=(4, 4))
ax.scatter(pix_ct, y, marker='o', s=40, facecolors='none', edgecolors='b')
ax.set_ylabel('Mussel Biomass (g)')
#ax.set_xlim(0, 1.05)
ax.set_xlabel('Fraction of Pixels Labelled Mussel')

x = np.linspace(0, 0.3)
ax.plot(x, m*x + c, 'b', linestyle='-')
ax.annotate(r'r = %.4f' % r_val, xy=(.06, .805), fontsize=16, xycoords='axes fraction')

ax.grid()
fname = 'TestingSet_Lab_biomass_v_fract_mussel_pixels_v2'

plt.tight_layout()
fig.savefig(fname + '.png')
fig.savefig(fname + '.eps', format='eps')
'''

In [None]:
%matplotlib notebook

In [None]:
seg_mask = seg_mask[:-273, 1250:3250]
mask = mask[:-273, 1250:3250]
rgb = rgb[:-273, 1250:3250, :]
w = 640
seg = cv2.resize(seg_mask, (w, w))
rgb = cv2.resize(rgb, (w, w))
msk = cv2.resize(mask, (w, w))

In [None]:
seg = seg_mask.copy()

In [None]:
rgb.shape

In [None]:
seg.shape

In [None]:
#imask = np.invert(msk).astype('bool')
#imask.shape

In [None]:
#imask.astype('bool').shape

In [None]:
#rgb[imask] = 0

In [None]:
#plt.imshow(rgb)

In [None]:
#w = 640
#seg = cv2.resize(seg_mask, (w, w))
#rgb = cv2.resize(rgb, (w, w))
img = np.ascontiguousarray(rgb)

In [None]:
img.shape

In [None]:
img = np.ascontiguousarray(rgb)
labels = np.stack([seg, 1 - seg])
c = labels.shape[0]
h = labels.shape[1]
w = labels.shape[2]
labels = labels.astype('float') / labels.max()

U = utils.unary_from_softmax(labels)
U = np.ascontiguousarray(U)
d = dcrf.DenseCRF2D(w, h, c)
d.setUnaryEnergy(U)
MAX_ITER = 10
POS_W = 3
POS_XY_STD = 10
Bi_W = 40
Bi_XY_STD = 67
Bi_RGB_STD = 30

# This adds the color-independent term, features are the locations only.
"""
@param compat=3, Potts model - it introduces a penalty for nearby similar 
pixels that are assigned different labels. 
"""
d.addPairwiseGaussian(sxy=3, compat=3)
# This adds the color-dependent term, i.e. features are (x,y,r,g,b).
# im is an image-array, e.g. im.dtype == np.uint8 and im.shape == (640,480,3)
d.addPairwiseBilateral(sxy=80, srgb=13, rgbim=img, compat=10)
Q = d.inference(MAX_ITER)
Q = np.array(Q).reshape((c, h, w))

crf_mask = (Q[0] * 255).astype('uint8')
crf_mask_file = os.path.join(SAVE_PATH, all_images[i].split('/')[-1].split('.')[0] + '_mask_crf.png')
cv2.imwrite(crf_mask_file, fmask)

addPairwiseGaussian
- `sxy` = $\theta_{\gamma}$, smoothness kernel

addPairwiseBilateral
- `sxy` = $\theta_{\alpha}$, appearance kernel
- `srgb` = $\theta_{\beta}$, appearance kernel 

In [None]:
font=28
fig, axes = plt.subplots(1, 3, figsize=(20, 8))
axes[0].imshow(rgb)
axes[0].set_title('RGB input', fontsize=font)
axes[1].imshow(seg)
axes[1].set_title('Rough mask', fontsize=font)
#Q[0][Q[0] >= 0.5] = 1
#Q[0][Q[0] < 0.5] = 0
axes[2].imshow(Q[0])
axes[2].set_title('CRF output', fontsize=font)

for i in range(len(axes)):
    axes[i].axis('off')

#plt.tight_layout()
#fig.savefig(all_images[i].split('/')[-1].split('.')[0] + '_CRF_1x3.png')

In [None]:
%matplotlib notebook
#all_images[i].split('/')[-1].split('.')[0]

In [None]:
#fmask = Q[0].astype()
#Q[0][Q[0] >= 0.5] = 1
#Q[0][Q[0] < 0.5] = 0
fmask = (Q[0] * 255).astype('uint8')

In [None]:
crf_mask_file = os.path.join(SAVE_PATH, all_images[i].split('/')[-1].split('.')[0] + '_mask_crf.png')
cv2.imwrite(crf_mask_file, fmask)

In [None]:
seg_mask_bak = seg_mask.copy()

In [None]:
seg_mask = seg_mask_bak.copy()

In [None]:
rho = 10  
theta = np.pi / 45
threshold = 500
mLL = 500
mLG = 20
linesP = cv2.HoughLinesP(seg_mask, rho, theta, threshold=threshold, minLineLength=mLL, maxLineGap=mLG)
print(len(linesP))

In [None]:
plt.imshow(seg_mask)

In [None]:
#len(linesP)
buf = 1000

In [None]:
line_width = 10
#N = 5
if linesP is not None:
    for i in range(len(linesP)):
        l = linesP[i][0]
        pt1 = (l[0], l[1])
        pt2 = (l[2], l[3])
        if np.abs(l[1] - l[3]) < 50:
            print('Found horiz line', pt1, pt2)
            cv2.line(rgb, pt1, pt2, (255, 0, 255), line_width, cv2.LINE_AA)
            seg_mask[l[3] - buf:l[1] + buf, l[0] - buf * 4:l[2] + buf * 4] = 0
plt.figure(figsize=(14, 10))
plt.imshow(rgb)

In [None]:

#pix_ct = pix_ct / pix_ct.max()

In [None]:
import scipy.stats as ss
from scipy.stats.distributions import t

In [None]:
all_images[0].split('/')[-1].split('.')[0][4:-8]

In [None]:
root_fname = all_images[0].split('/')[-1].split('.')[0][4:-8]
guid = image_df[image_df['Name'].str.contains(root_fname)]['Analysis Index'].astype('int64')
row = data_df[data_df['Analysis Index'].values == np.unique(guid.values)]
lab_targets[i, 0] = row['Biomass'].values
lab_targets[i, 1] = row['Count'].values

In [None]:
#lab_targets

In [None]:
#v, cts = np.unique(mask, return_counts=True)
#print(cts)

In [None]:
cts[1]

In [None]:
good = close.copy()

In [None]:
k_size = 25
kernel = np.ones((k_size, k_size), np.uint8)
t = cv2.erode(close, kernel, iterations=1)

k_size = 120
kernel = np.ones((k_size, k_size), np.uint8)
mask = cv2.dilate(t, kernel, iterations=1)

In [None]:
plt.imshow()

In [None]:
#plt.imshow(erosion)

In [None]:
close.shape

In [None]:
mask

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(16, 12))
axes[0].imshow(close)
#clean_mask = close[mask == 1] = 1
axes[1].imshow(mask)
for i in range(len(axes.flat)):
    axes.flat[i].axis('off')
plt.show() #pause(0.1)

In [None]:
import numpy as np
try:
    import cPickle as pickle
except ImportError:
    import pickle

from pystruct import learners
import pystruct.models as crfs
from pystruct.utils import SaveLogger

In [None]:
!pip install cvxopt

In [None]:
data_train['X'][:10][0][0].shape

In [None]:
#data_train['Y'][:10][0][0]

In [None]:
#data_train = pickle.load()
# https://rebeccabilbro.github.io/convert-py2-pickles-to-py3/
with open('/scratch/ssd/data/CRF_Tut/data_train.pickle', 'rb') as f:
    data_train = pickle.load(f, encoding='latin1')

In [None]:
#data_train = pickle.load(open("/scratch/ssd/data/CRF_Tut/data_train.pickle"))
C = 0.01

n_states = 21
print("number of samples: %s" % len(data_train['X']))
class_weights = 1. / np.bincount(np.hstack(data_train['Y']))
class_weights *= 21. / np.sum(class_weights)
print(class_weights)

model = crfs.EdgeFeatureGraphCRF(inference_method='qpbo',
                                 class_weight=class_weights,
                                 symmetric_edge_features=[0, 1],
                                 antisymmetric_edge_features=[2])

experiment_name = "edge_features_one_slack_trainval_%f" % C

ssvm = learners.NSlackSSVM(
    model, verbose=2, C=C, max_iter=100000, n_jobs=-1,
    tol=0.0001, show_loss_every=5,
    logger=SaveLogger(experiment_name + ".pickle", save_every=100),
    inactive_threshold=1e-3, inactive_window=10, batch_size=100)

ssvm.fit(data_train['X'], data_train['Y'])

data_val = pickle.load(open("data_val_dict.pickle"))
y_pred = ssvm.predict(data_val['X'])

# we throw away void superpixels and flatten everything
y_pred, y_true = np.hstack(y_pred), np.hstack(data_val['Y'])
y_pred = y_pred[y_true != 255]
y_true = y_true[y_true != 255]

print("Score on validation set: %f" % np.mean(y_true == y_pred))

In [None]:
learners.NSlackSSVM?

In [None]:
#!pip install pyqpbo

In [None]:
from skimage.segmentation import slic
from skimage.segmentation import mark_boundaries

In [None]:
segments = slic(rgb, 1000)
#slic?

In [None]:
rgbb = mark_boundaries(rgb, segments, color=(0,0,0))

In [None]:
plt.figure()
plt.imshow(rgbb)
plt.show()

In [None]:
%matplotlib

In [None]:
plt.imshow(np.invert(mask))

In [None]:
rgb[np.invert(mask), :].shape # = 0

In [None]:
plt.imshow(rgb)

In [None]:
#mask.shape

In [None]:
np.unique(mask)

In [None]:
rgb.shape