# Preprocess and Crop images

* <b>Instructions</b>
    * cd to the folder which contains data folder "./RawData/"
    * Restart kernel of the jupyter notebook and run all cells.

### Update the following parameters for yourself in the first cell


* <b><i>inputDir</i></b> : The path for batch image data it would be a good idea to put absolute path here,then no need to worry about where this script is saved. <i>data type: string</i>


* <b><i>outputDir</i></b> : the destination to save the output images. <i>data type: string</i>


* <b><i>totalSoybeansPerImg</i></b> : this parameter is used to differentiate images with few soybeans from images with too many soybeans (like more than 100 soybeans per img ). <i>data type: int</i>


* <b><i>colorPad</i></b> : whether images contains colorPad. <i>data type: boolean</i>


* <b><i>targetCmPerPixel</i></b> : To normalize size of soybeans over all source images. <i>data type:float</i>

In [1]:
## update the following parameters
## batch 5
inputDir = '/Users/huiminhan/Desktop/InfoVis/CropAI/RawData/batch_05/'
outputDir = '/Users/huiminhan/Desktop/InfoVis/CropAI/CroppedImageData/Crop_batch5/'
totalSoybeansPerImg = 15 # could be an approxiate value
colorPad = False

In [2]:
## Update: Add one more parameter
targetCmPerPixel = 0.007

In [44]:
import cv2
import numpy as np
import matplotlib.pyplot as plt 
import os
import imutils
from imutils import perspective
from scipy.spatial import distance as dist
from imutils import perspective
import math
import shutil

In [41]:
def resize_image(image, height, width):  
    h, w, _ = image.shape
    top, bottom, left, right = (0, 0, 0, 0)
    longest_edge = max(h, w)
    if h < w:
        image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
    h, w, _ = image.shape
    if h < longest_edge:
        dh = longest_edge - h
        top = dh // 2
        bottom = dh - top
    elif w < longest_edge:
        dw = longest_edge - w
        left = dw // 2
        right = dw - left
    else:
        pass  
    BLACK = [0,0,0]
    constant = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=BLACK)
    h,w, _ = constant.shape
    if h <= height and w <= width:
        resized = cv2.copyMakeBorder(constant, (height-h)//2,(height-h)//2, (width-w)//2, (width-w)//2, cv2.BORDER_CONSTANT, value=BLACK)
    else:
        resized = constant
    return resized

def maxCont(contours):
    contAreaList = []
    for cont in contours:
        area = cv2.contourArea(cont)
        contAreaList.append(area)
    return max(contAreaList)

def mdpt(A, B):
    return ((A[0] + B[0]) * 0.5, (A[1] + B[1]) * 0.5)

def cmPerPixel(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (7, 7), 0)
    edge_detect = cv2.Canny(gray, 60, 200) 
    edge_detect = cv2.dilate(edge_detect, None, iterations=1)
    edge_detect = cv2.erode(edge_detect, None, iterations=1)
    cnts = cv2.findContours(edge_detect.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)
    for cont in cnts:
        ares = cv2.contourArea(cont)  
        if ares == maxCont(cnts):
            rect = cv2.boundingRect(cont)
            bbox = cv2.minAreaRect(cont)
            bbox = cv2.boxPoints(bbox) 
            bbox = np.array(bbox, dtype="int")
            ## order the contours and draw bounding box
            bbox = perspective.order_points(bbox)
            for (x, y) in bbox:
                (tl, tr, br, bl) = bbox
                (tltrX, tltrY) = mdpt(tl, tr)
                (blbrX, blbrY) = mdpt(bl, br)
                (tlblX, tlblY) = mdpt(tl, bl)
                (trbrX, trbrY) = mdpt(tr, br)
            ## compute the Euclidean distances between the mdpts
            dA = dist.euclidean((tltrX, tltrY), (blbrX, blbrY))
            dB = dist.euclidean((tlblX, tlblY), (trbrX, trbrY))
            cmPerPixel = 20/dA
    
    return cmPerPixel

def Nrotate(angle,valuex,valuey,pointx,pointy):
    angle = (angle/180)*math.pi
    valuex = np.array(valuex)
    valuey = np.array(valuey)
    nRotatex = (valuex-pointx)*math.cos(angle) - (valuey-pointy)*math.sin(angle) + pointx
    nRotatey = (valuex-pointx)*math.sin(angle) + (valuey-pointy)*math.cos(angle) + pointy
    return (nRotatex, nRotatey)

def Srotate(angle,valuex,valuey,pointx,pointy):
    angle = (angle/180)*math.pi
    valuex = np.array(valuex)
    valuey = np.array(valuey)
    sRotatex = (valuex-pointx)*math.cos(angle) + (valuey-pointy)*math.sin(angle) + pointx
    sRotatey = (valuey-pointy)*math.cos(angle) - (valuex-pointx)*math.sin(angle) + pointy
    return (sRotatex,sRotatey)

def rotatecordiate(angle,rectboxs,pointx,pointy):
    output = []
    for rectbox in rectboxs:
        if angle>0:
            output.append(Srotate(angle,rectbox[0],rectbox[1],pointx,pointy))
        else:
            output.append(Nrotate(-angle,rectbox[0],rectbox[1],pointx,pointy))
    return output

def imageCrop(image,box):
    xs = [x[1] for x in box]
    ys = [x[0] for x in box]
    cropimage = image[min(xs):max(xs),min(ys):max(ys)]
    return cropimage
  
def normalizeSize(image,cmPerPixel,targetCmPerPixel):
    scale_percent = cmPerPixel/targetCmPerPixel      # percent of original size
    print("The cm per pixel: ", cmPerPixel)
    width = int(image.shape[1] * scale_percent / 100)
    height = int(image.shape[0] * scale_percent / 100)
    dim = (width, height)
    # resize image
    resized = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
    return image
    
def crop(image,outputDir,imgNum,target_height,target_width,targetPerPixel=0.007,imgId='',colorPad=False):
    src = cv2.imread(image)
    cmperPixel = cmPerPixel(src)
    height = target_height
    width = target_width
    fsrc = np.array(src,dtype = np.float32)/255.0
    (b,g,r) = cv2.split(fsrc)
#     gray = 2 * g - b - r + 0.32 * (1.4 * r - b)
    gray = 2 * g - 3 * b - r + 0.8 * (1.4 * r - b)
    
    ## get min and max
    (minVal, maxVal, minLoc, maxLoc) = cv2.minMaxLoc(gray)
    
    ## image processing to remove noise
    gray_u8 = np.array((gray - minVal)/ (maxVal - minVal) * 255, dtype = np.uint8)
    (_, thresh) = cv2.threshold(gray_u8, -1.0, 255, cv2.THRESH_OTSU)
    if imgNum == 15:
        size = 90
    elif imgNum >15:
        size = 25
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (size, size))
    closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
    closed = cv2.erode(closed, None, iterations=4)
    closed = cv2.dilate(closed, None, iterations=4)
    
    ##
    open_size = 60
    open_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (open_size, open_size))
    opening = cv2.morphologyEx(closed, cv2.MORPH_OPEN, open_kernel)
    
    ## convert the gray image into colorful
    (b8, g8, r8) = cv2.split(src)
    color_img = cv2.merge([b8 & closed, g8 & closed, r8 & closed]) 
    
    ## find contours
    contours, hierarchy = cv2.findContours(opening.copy(),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
    count=0 
    WidthList = [cv2.boundingRect(cont)[2] for cont in contours]
    HeightList = [cv2.boundingRect(cont)[3] for cont in contours]
    longestEdge = max(max(WidthList),max(HeightList))+20
#     print(longestEdge)
    for cont in contours:       
        ## calculate the area size of contours
        ares = cv2.contourArea(cont)
     
        ## filter contours not for soybeans
        if ares<10000 or ares>800000: 
            continue       
        elif ares==maxCont(contours) and colorPad==True:
            continue    
        count+=1 
        rect = cv2.minAreaRect(cont) #get the coordinates
        box_origin = cv2.boxPoints(rect)
#         print(rect[2])
        box = rotatecordiate(rect[2],box_origin,rect[0][0],rect[0][1])
        M = cv2.getRotationMatrix2D(rect[0],rect[2],1)
        dst = cv2.warpAffine(color_img,M,(2*color_img.shape[0],2*color_img.shape[1]))
        new_img = imageCrop(dst,np.int0(box))
        if colorPad == True:
            normalizeSize_img = (normalizeSize(new_img,cmperPixel,targetCmPerPixel))
        else:
            normalizeSize_img = new_img
        resized_img = resize_image(normalizeSize_img,height,width)       
#         ## get the coordinates to crop
#         x,y,w,h  = cv2.boundingRect(cont)

        ## crop the image and save new image
        if os.path.isdir(outputDir):
            pass
        else:
            os.mkdir(outputDir)
#         new_img=color_img[y-10:y+h+10,x-10:x+w+10]
#         resized_img = resize_image(new_img,height,width)

        ## writes the new file in the Crops folder
        cv2.imwrite(outputDir+'croped_'+str(imgId)+ '_' + str(count)+ '.jpg', resized_img)
        
def batchProcessing(inputDir,outputDir,imgNum,target_height=900,target_width=900,colorPad=False):
    imread_failed = []
    imgNum = imgNum
    height = target_height
    width = target_width
    pad=colorPad
    for (path,dirname,filenames) in os.walk(inputDir):
        for image in filenames:
            try:
                src = path + image
                imgId = image.split('.')[0]
                crop(src,outputDir,imgNum,height,width,imgId=imgId,colorPad=pad)  
                print("Image "+str(image)+" cropped.")
            except:
                imread_failed.append(image)
    
    print("Failed images: ",imread_failed)

def splitData(inputDir):
    soybeans_ready = []
    soybeans_late = []
    soybeans_disease = []
    for *_, filenames in os.walk(inputDir):
        for file in filenames:
            if file.split('_')[-2].endswith('ready'):
                soybeans_ready.append(file)
            elif file.split('_')[-2].endswith('late'):
                soybeans_late.append(file)
            elif file.split('_')[-2].endswith('disease'):
                soybeans_disease.append(file)
            else:
                print('File name incorrect')
    return soybeans_ready,soybeans_late,soybeans_disease 


def split3folder(inputDir):
    for *_, filenames in os.walk(inputDir):
        for filename in filenames:
            if filename.split('_')[-2] == 'ready':
                old_path = inputDir
                new_path = inputDir+'ready/'
                if os.path.isdir(new_path):
                    pass
                else:
                    os.mkdir(new_path)
                shutil.move(old_path+filename,new_path+filename)
            elif filename.split('_')[-2] == 'late':
                old_path = inputDir
                new_path = inputDir+'late/'
                if os.path.isdir(new_path):
                    pass
                else:
                    os.mkdir(new_path)
                shutil.move(old_path+filename,new_path+filename)
            elif filename.split('_')[-2] == 'disease':
                old_path = inputDir
                new_path = inputDir+'disease/'
                if os.path.isdir(new_path):
                    pass
                else:
                    os.mkdir(new_path)
                shutil.move(old_path+filename,new_path+filename)
            else:
                print('something wrong with filenames')

In [39]:
batchProcessing(inputDir,outputDir,totalSoybeansPerImg,target_height=1200,target_width=1200,colorPad=colorPad)

Image IMG_2133_ready.JPG cropped.
Image IMG_2144_disease.JPG cropped.
Image IMG_2127_ready.JPG cropped.
Image IMG_2112_ready.JPG cropped.
Image IMG_2119_ready.JPG cropped.
Image IMG_2138_ready.JPG cropped.
Image IMG_2142_ready.JPG cropped.
Image IMG_2114_ready.JPG cropped.
Image IMG_2148_disease.JPG cropped.
Image IMG_2135_ready.JPG cropped.
Image IMG_2121_ready.JPG cropped.
Image IMG_2126_ready.JPG cropped.
Image IMG_2132_ready.JPG cropped.
Image IMG_2113_ready.JPG cropped.
Image IMG_2118_ready.JPG cropped.
Image IMG_2139_ready.JPG cropped.
Image IMG_2147_disease.JPG cropped.
Image IMG_2120_ready.JPG cropped.
Image IMG_2134_ready.JPG cropped.
Image IMG_2149_disease.JPG cropped.
Image IMG_2128_ready.JPG cropped.
Image IMG_2110_ready.JPG cropped.
Image IMG_2125_ready.JPG cropped.
Image IMG_2140_ready.JPG cropped.
Image IMG_2145_disease.JPG cropped.
Image IMG_2123_ready.JPG cropped.
Image IMG_2116_ready.JPG cropped.
Image IMG_2129_ready.JPG cropped.
Image IMG_2111_ready.JPG cropped.
Imag

In [46]:
# split3folder('../CroppedImageData/Crop_batch5/')

In [6]:
# ## batch 3
# inputDir = '/Users/huiminhan/Desktop/InfoVis/CropAI/RawData/Batch3/'
# outputDir = '/Users/huiminhan/Desktop/InfoVis/CropAI/CroppedImageData/Crop_batch3/'
# totalSoybeansPerImg = 15 # could be an approxiate value
# colorPad = False
# batchProcessing(inputDir,outputDir,totalSoybeansPerImg,target_height=900,target_width=900,colorPad=colorPad)

In [7]:
# ## batch 4
# inputDir = '/Users/huiminhan/Desktop/InfoVis/CropAI/RawData/batch_04/'
# outputDir = '/Users/huiminhan/Desktop/InfoVis/CropAI/CroppedImageData/Crop_batch4/'
# totalSoybeansPerImg = 15 # could be an approxiate value
# colorPad = False
# batchProcessing(inputDir,outputDir,totalSoybeansPerImg,target_height=1500,target_width=1500,colorPad=colorPad)

In [8]:
# inputDir = 'RawData/Edamame_photos_04092021_Batch1/'
# outputDir = './Crop_batch1_labeled/'
# imgNum = 90 # as long as greater than 15, doesn't matter what value put here
# batchProcessing(inputDir,outputDir,totalSoybeansPerImg,target_height=1200,target_width=1200)

In [9]:
# ready,late,disease = splitData('./Crop_batch1/')
# print('Totally {} ready to harvest soybeans images'.format(len(ready)))
# print('Totally {} late soybeans images'.format(len(late)))
# print('Totally {} soybeans with diseases images'.format(len(disease)))

In [32]:
## Label the images:
# def labelImage(inputDir):
#     for (path,dirname,filenames) in os.walk(inputDir):
#         for image in filenames:
#             if image.endswith('JPG'):
#                 if int(image.split('.')[0].split('_')[1]) >= 2144 and int(image.split('.')[0].split('_')[1]) <= 2149:
#                     print(image)
#                     print(image.split('.')[0]+'_'+'disease'+'.JPG')
#                     os.rename(inputDir+image,inputDir+image.split('.')[0]+'_'+'disease'+'.JPG')
#                 else:
#                     os.rename(inputDir+image,inputDir+image.split('.')[0]+'_'+'ready'+'.JPG')

# labelImage('../RawData/batch_05/')


IMG_2145_disease_disease.JPG
IMG_2145_disease_disease_disease.JPG
IMG_2144_disease_disease.JPG
IMG_2144_disease_disease_disease.JPG
IMG_2147_disease_disease.JPG
IMG_2147_disease_disease_disease.JPG
IMG_2149_disease_disease.JPG
IMG_2149_disease_disease_disease.JPG
IMG_2148_disease_disease.JPG
IMG_2148_disease_disease_disease.JPG
