# Preprocess Boom Data and its annotations


In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas
import os
import pickle
from scipy.misc import imread
from os.path import join
import numpy as np
import itertools
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
%matplotlib inline
import glob
import math
import cv2

from utils.preprocess_data import *
from utils.process_dirs import *

## Uninoculated data

In [None]:
txt_unin = '/home/anastasia/cropDL/data/boom/canopy_uninoculated_tyrwh_1/ann_img.txt'

In [None]:
samples_unin = read_paths(txt_unin)
print samples_unin[0].img_path

## Image example

In [None]:
img = Image.open(samples_unin[0].img_path)
plt.imshow(img, cmap='Greys_r')

In [None]:
for i, [ann_path, img_path] in enumerate(samples_unin):

    img_segments = extract_segmnets(ann_path)
    nmb_seg = count_lesions_nmb(img_segments)

    print "{}: {}, nmb_segments = {} \n"\
            .format(i, os.path.basename(img_path), len(img_segments))

### Drawn Overlay example

In [None]:
f = 200
ann_path, img_path = samples_unin[f]

img_segments = extract_segmnets(ann_path)
nmb_lesions = count_lesions_nmb(img_segments)
if nmb_lesions == 0:
    print 'Number of lesions = 0'
else:
    for i, seg in enumerate(img_segments):
        print 'segment coordinates: {}, segment {} length: '.format(seg.coord(), i), seg.length()

    nmb_intersections = compute_intersections_nmb(img_segments)
    nmb_lesions = len(img_segments) - nmb_intersections
    print '\nnumber of intersections', nmb_intersections
    print "number of lesions {} \n".format(nmb_lesions)

fig, ax = plt.subplots(figsize=(60,20))
print img_path
img = Image.open(img_path)
draw = ImageDraw.Draw(img) 
if nmb_lesions:
    for seg in img_segments:
        draw.line(seg.coord(), fill=128, width = 10)
ax.imshow(img, cmap='Greys_r')

## Save images with overlaid segments and ellipses 

In [None]:
import math

path_to_save = '/home/anastasia/cropDL/data/boom/canopy_uninoculated_tyrwh_2/Imgs_masks/'

SAVE = True

for ann_path, img_path in samples_unin:

    basename = os.path.basename(img_path.split('.')[0])
    print basename
    img_segments = extract_segmnets(ann_path)
    nmb_lesions = count_lesions_nmb(img_segments)

    img = cv2.imread(img_path)
    
    if nmb_lesions:
        for seg in img_segments:
            x1,y1,x2,y2 = seg.coord()

            cv2.line(img, (x1,y1), (x2,y2), (0,0,255),5)
        if SAVE:
            cv2.imwrite(os.path.join(path_to_save,basename+'_line.JPG'),img)

    if nmb_lesions:
        for seg in img_segments:
            x1,y1,x2,y2 = seg.coord()

            center = (int((x1 + x2)*0.5), int((y1 + y2)*0.5))
            seglen = int(seg.length()/2)
            angle = int(math.atan2((y1-y2),(x1-x2))*180/math.pi)

            cv2.ellipse(img=img, center=center, axes=(seglen, seglen/4), angle=angle, startAngle=360, endAngle=0,  
                color=(0,0,255),thickness=-1)
        if SAVE:
            cv2.imwrite(os.path.join(path_to_save,basename+'_mask.JPG'),img)


## Create per pixel annotation with ellipse gt

In [None]:
COEFF = 6 # ratio of ellipse axis
# Hlim = 180 # hue limit

path_to_save = '/home/anastasia/cropDL/data/boom/canopy_uninoculated_tyrwh_1/Imgs_gtEllip/'
for i, (ann_path, img_path) in enumerate(samples_unin):
    basename = os.path.basename(img_path.split('.')[0])
    print basename
    img_segments = extract_segmnets(ann_path)

    img = cv2.imread(img_path)

    mask = np.ones(img.shape[:2], np.uint8)
    for seg in img_segments:

        x1,y1,x2,y2 = seg.coord()

        center = (int((x1 + x2)*0.5), int((y1 + y2)*0.5))
        seglen = int(seg.length()/2)
        angle = int(math.atan2((y1-y2),(x1-x2))*180/math.pi)

        cv2.ellipse(img=mask, center=center, axes=(seglen, seglen/COEFF), angle=angle, startAngle=360, endAngle=0,  
            color=255,thickness=-1)

    cv2.imwrite(os.path.join(path_to_save,basename+'_gtEllip.JPG'),mask)






## Split uninocul in quarters

In [None]:
from utils.split_image_4_quarters import split_img_quarters

dir_to_save = '/home/anastasia/cropDL/data/boom/canopy_uninoculated_tyrwh_1/Quarters/Imgs_gtEllip/'
imgs_dir = '/home/anastasia/cropDL/data/boom/canopy_uninoculated_tyrwh_1/Imgs_gtEllip'

for i,img_path in enumerate(os.listdir(imgs_dir)):
    print i
    img_path = os.path.join(imgs_dir, img_path)
    split_img_quarters(img_path, dir_to_save)

In [None]:
img = Image.open(dir_to_save+os.listdir(dir_to_save_save_save_saveto_save)[0])
plt.imshow(img, cmap='Greys_r')

## Create txt file with per pixel ann + quarter img paths

In [None]:
ann_paths = np.sort(os.listdir('/home/anastasia/cropDL/data/boom/canopy_uninoculated_tyrwh_2/Quarters/Imgs_gtEllip/'))
img_paths = os.listdir('/home/anastasia/cropDL/data/boom/canopy_uninoculated_tyrwh_2/Quarters/Imgs/')

with open ('/home/anastasia/cropDL/data/boom/canopy_uninoculated_tyrwh_2/Quarters/ann_img_unin.txt', 'w') as txt:
    for ann_path in ann_paths:
        txt.write(ann_path+'\n')
    

### Show histogram

In [None]:

# Calculate histogram with mask and without mask
# Check third argument for mask
img = cv2.cvtColor(img, cv2.COLOR_BGR2HLS)
hist_full = cv2.calcHist([img],[0],None,[Hlim],[0,Hlim])
color = ('b','g','r')

fig, ax = plt.subplots(figsize=(20,5))
for i,col in enumerate(color):
    print col, i
    hist_mask = cv2.calcHist([img],[i],mask,[Hlim],[0,Hlim])
    ax.plot(hist_mask, color = col)
    ax.set_xlim([0,250])
    ax.grid()

plt.show()

### Extract lesion

In [None]:
# define range of blue color in HSV
lower_lesion = np.array([140])
upper_lesion = np.array([179])


fig, ax = plt.subplots(figsize=(20,20))
# Threshold the HSV image to get only blue colors
masked_img_hls = cv2.cvtColor(masked_img, cv2.COLOR_BGR2HLS)
ax.imshow(masked_img_hls)
# crop lesion
# cropped = masked_img_hls[:2000:, 4000:]
# fig, ax = plt.subplots(figsize = (20,20))
# plt.imshow(cropped)
# cv2.imwrite('lesion.JPG', cropped)

masked_img_h = masked_img_hls[:,:,0]
ax.imshow(masked_img_h)
print masked_img_h.shape

mask_h = cv2.inRange(masked_img_h, lower_lesion, upper_lesion)

mask_hls = np.repeat(mask_h[:,:, np.newaxis], 3, axis = 2)

print 'mask_hls.dtype',mask_hls.dtype
print 'masked_img_hls.dtype', masked_img_hls.dtype
print mask_hls.shape, masked_img_hls.shape

fig, ax = plt.subplots(figsize=(20,20))
ax.imshow(mask_h)

# Bitwise-AND mask and original image
# res = cv2.bitwise_and(masked_img_hls,masked_img_hls, mask= mask_hls)


# print masked_img.shape
# fig, ax = plt.subplots(figsize = (20,20))
# plt.imshow(res)
# plt.imshow(masked_img_hls)

In [None]:
nmb_lesions_uninoc = []
            
for ann_path,_ in samples_unin: 

    segments = extract_segmnets(ann_path)
    nmb_lesions_uninoc.append(count_lesions_nmb(segments))
    
print 'Mean number of lesions per uninoculated image is', np.mean(nmb_lesions_uninoc)

In [None]:
f,a = plt.subplots(1,2, figsize=(15,5))
a = a.ravel()

a[0].hist(nmb_lesions_uninoc, facecolor='blue', bins = np.max(nmb_lesions_uninoc))
a[0].set_title('Histogram over nmb of lesions per image in TRAIN set')
a[0].set_xlabel('Nmb of lesions per image')
a[0].set_ylabel('Nmb of images')

# Inoculated data (more lesioned)

## Create file with segment annotation + image path

In [None]:
anns_inoc_dir_path = "/home/makarova/columbia/data/inoculated_ethan/Anns/"
imgs_inoc_dir_path = "/home/makarova/columbia/data/inoculated_ethan/Imgs/"

In [None]:
txt_inoc = "/home/makarova/columbia/data/inoculated_ethan/ann_img.txt"

In [None]:
import os
# create file with annotation paths
ann_paths = [os.path.join(anns_inoc_dir_path,f)\
             for f in os.listdir(anns_inoc_dir_path) if f.endswith('.csv')]

with open (txt_inoc, 'w') as txt:
    for ann_path in np.sort(ann_paths):
        txt.write(ann_path+'\n')
        
# open file with vim and in opened console past:
# :%s/\(.*\)Count\(.*\)_results.csv/\1Count\2_results.csv\ \1Done\2.JPG/g
# then the paths to images will be added to the same line

In [None]:
# check whether all image paths exist
with open (txt_inoc, 'r') as txt:
    lines = txt.readlines()
with open (txt_inoc, 'w') as txt:
    for line in lines:
        img_path = line.split(' ')[0]
        if os.path.isfile(img_path):
            txt.write(line)
        else: print line

## Create per ixel annotation

In [None]:
samples_inoc = read_paths(txt_inoc)
print samples_inoc[0].img_path

## Image example

In [None]:
img = Image.open(samples_inoc[0].img_path)
plt.imshow(img, cmap='Greys_r')

In [None]:
nmb_seg_all = []
for i, [ann_path, img_path] in enumerate(samples_inoc):

    img_segments = extract_segmnets(ann_path)
    nmb_seg = count_lesions_nmb(img_segments)
    nmb_seg_all.append(nmb_seg)
    print "{}: {}, nmb_segments = {} \n"\
            .format(i, os.path.basename(img_path), len(img_segments))

In [None]:
print 'Mean number of segments per image = {}'.format(np.mean(nmb_seg_all))

### Drawn Overlay example

In [None]:
f = 28
ann_path, img_path = samples_inoc[f]

img_segments = extract_segmnets(ann_path)
nmb_lesions = count_lesions_nmb(img_segments)
if nmb_lesions == 0:
    print 'Number of lesions = 0'
else:
    for i, seg in enumerate(img_segments):
        print 'segment coordinates: {}, segment {} length: '.format(seg.coord(), i), seg.length()

    nmb_intersections = compute_intersections_nmb(img_segments)
    nmb_lesions = len(img_segments) - nmb_intersections
    print '\nnumber of intersections', nmb_intersections
    print "number of lesions {} \n".format(nmb_lesions)

fig, ax = plt.subplots(figsize=(60,20))
print img_path
img = Image.open(img_path)
draw = ImageDraw.Draw(img) 
if nmb_lesions:
    for seg in img_segments:
        draw.line(seg.coord(), fill=128, width = 10)
ax.imshow(img, cmap='Greys_r')

## Save images with overlaid segments and ellipses 

In [None]:
import math

path_to_save = '/home/makarova/columbia/data/inoculated_ethan/Imgs_masks/'

SAVE = True

for ann_path, img_path in samples_inoc:

    basename = os.path.basename(img_path.split('.')[0])
    print basename
    img_segments = extract_segmnets(ann_path)
    nmb_lesions = count_lesions_nmb(img_segments)

    img = cv2.imread(img_path)
    
    if nmb_lesions:
        for seg in img_segments:
            x1,y1,x2,y2 = seg.coord()

            cv2.line(img, (x1,y1), (x2,y2), (0,0,255),5)
        if SAVE:
            cv2.imwrite(os.path.join(path_to_save,basename+'_line.JPG'),img)

    if nmb_lesions:
        for seg in img_segments:
            x1,y1,x2,y2 = seg.coord()

            center = (int((x1 + x2)*0.5), int((y1 + y2)*0.5))
            seglen = int(seg.length()/2)
            angle = int(math.atan2((y1-y2),(x1-x2))*180/math.pi)

            cv2.ellipse(img=img, center=center, axes=(seglen, seglen/4), angle=angle, startAngle=360, endAngle=0,  
                color=(0,0,255),thickness=-1)
        if SAVE:
            cv2.imwrite(os.path.join(path_to_save,basename+'_mask.JPG'),img)


## Create per pixel annotation with ellipse gt

In [None]:
COEFF = 6 # ratio of ellipse axis
# Hlim = 180 # hue limit

path_to_save = '/home/makarova/columbia/data/inoculated_ethan/Imgs_gtEllip/'
for i, (ann_path, img_path) in enumerate(samples_inoc):
    basename = os.path.basename(img_path.split('.')[0])
    print basename
    img_segments = extract_segmnets(ann_path)

    img = cv2.imread(img_path)

    mask = np.ones(img.shape[:2], np.uint8)
    for seg in img_segments:

        x1,y1,x2,y2 = seg.coord()

        center = (int((x1 + x2)*0.5), int((y1 + y2)*0.5))
        seglen = int(seg.length()/2)
        angle = int(math.atan2((y1-y2),(x1-x2))*180/math.pi)

        cv2.ellipse(img=mask, center=center, axes=(seglen, seglen/COEFF), angle=angle, startAngle=360, endAngle=0,  
            color=255,thickness=-1)

    cv2.imwrite(os.path.join(path_to_save,basename+'_gtEllip.JPG'),mask)






## Create txt file with per pixel ann + quarter img paths

In [None]:
ann_dir = '/home/makarova/columbia/data/inoculated_ethan/Imgs_gtEllip/'
ann_paths = np.sort(os.listdir(ann_dir))
img_paths = os.listdir('/home/makarova/columbia/data/inoculated_ethan/Imgs/')

txt_inoc = '/home/makarova/columbia/data/inoculated_ethan/gt_img_inoc.txt'
with open (txt_inoc, 'w') as txt:
    for ann_path in ann_paths:
        txt.write(os.path.join(ann_dir,ann_path+'\n'))
    

### Show histogram for hue in ellipse mask

In [None]:

# # Calculate histogram with mask and without mask
# # Check third argument for mask
# img = cv2.cvtColor(img, cv2.COLOR_BGR2HLS)
# hist_full = cv2.calcHist([img],[0],None,[Hlim],[0,Hlim])
# color = ('b','g','r')

# fig, ax = plt.subplots(figsize=(20,5))
# for i,col in enumerate(color):
#     print col, i
#     hist_mask = cv2.calcHist([img],[i],mask,[Hlim],[0,Hlim])
#     ax.plot(hist_mask, color = col)
#     ax.set_xlim([0,250])
#     ax.grid()

# plt.show()

# Merge several txt files and create train/valid/test txt

In [None]:
# txt files with paths to gt annotation and img
txt1 = '/home/makarova/columbia/data/inoculated_ethan/gt_img_inoc.txt'
txt2 = '/home/makarova/columbia/data/inoculated_1/gt_img_inoc.txt'

txt_all = '/home/makarova/columbia/data/gt_img_inoc__tyrethan.txt'
txt_train = '/home/makarova/columbia/data/gt_img_inoc_tyrethan_train'
txt_valid = '/home/makarova/columbia/data/gt_img_inoc_tyrethan_valid'
txt_test = '/home/makarova/columbia/data/gt_img_inoc_tyrethan_test'

In [None]:
with open (txt2, 'r') as fin:
    lines1 = fin.readlines()
with open (txt1, 'r') as fin:
    lines2 = fin.readlines()

print 'len(lines1)', len(lines1)
lines1.extend(lines2) 
print 'len(lines1)', len(lines1) 

with open (txt_all, 'w') as fout:
    for i,line in enumerate(lines1):
        img_path = line.split(' ')[0]
        if os.path.isfile(img_path):
            fout.write(line)
        else: print i, line
    print i+1
        
        

In [None]:
import os

with open (txt1, 'r') as fin:
    lines1 = fin.readlines()
with open (txt2, 'r') as fin:
    lines2 = fin.readlines()
    
print len(lines1), len(lines2)


mode = 'w'
print 'saved to:', txt_train, txt_valid, txt_test
with open (txt_train, mode) as fout_train, open(txt_valid, mode) as fout_valid, open(txt_test, mode) as fout_test:
    
    tr, v, ts = 0, 0, 0
    for i,line in enumerate(lines1):
        img_path = line.split(' ')[0]
        if os.path.isfile(img_path):
            if i < 400:
                fout_train.write(line)
                tr+=1
            if 400 <= i < 450:
                fout_valid.write(line)
                v+=1
            if i >= 450:
                fout_test.write(line)
                ts+=1
    print tr, v, ts

mode = 'a+'
print 'saved to:', txt_train, txt_valid, txt_test
with open (txt_train, mode) as fout_train, open(txt_valid, mode) as fout_valid, open(txt_test, mode) as fout_test:
    
    tr, v, ts = 0, 0, 0
    for i,line in enumerate(lines2):
        img_path = line.split(' ')[0]
        if os.path.isfile(img_path):
            if i < 400:
                fout_train.write(line)
                tr+=1
            if 400 <= i < 450:
                fout_valid.write(line)
                v+=1
            if i >= 450:
                fout_test.write(line)
                ts+=1
    print tr, v, ts

# Crop images into smaller ones
Preprocess images