In [31]:
import numpy as np
import cv2
import re
import os
import albumentations as A

In [32]:
img_dir = '/data/cv_project/spikedroid/ploid_classification'
mask_dir = '/home/jupyter-n.artemenko/projects/spikelet_pubescence/masks'
project_dir = '/home/jupyter-n.artemenko/projects/spikelet_pubescence'
save_dir = '/home/jupyter-n.artemenko/projects/spikelet_pubescence/crops_without_spine'

In [33]:
os.chdir(project_dir)

### **Get image, mask, make crop and save**

In [34]:
# define regex patterns
ploid_patt = r'(Гексаплоиды)|(Диплоиды)|(Тетраплоиды)'
spec_patt = r'T\.\s[a-z]*'
pin_patt = r'[\d]{2,5}_(([^,(I|l)]_\{[XХI]+-?\d{2}\})|(\d))\.(jpg|JPG)$'
table_patt = r'[\d]{2,5}(_\{[ХXI]+-?\d{2}\})?\.(jpg|JPG)$'
name_patt = r'[\d_]*(\{[ХXI]{1,2}-?\d{2,5}\})?'

In [50]:
def make_crop(ploid, spec, img_type, img_name):
    # file with data of unsuccessful attempts
    logs = open(f'{os.getcwd()}/crop_logs.txt', 'w')
    
    # if we don't want overwrite files
    # if os.path.exists((f'{save_dir}/{ploid}/{spec}/{img_type}/{img_name}').replace('.jpg','.png').replace('.JPG','.png')):
    #     return
                    
    # Create directory with species
    if not os.path.exists(f'{save_dir}/{ploid}/{spec}/{img_type}'):
        spec = spec.replace(' ', '\ ')
        os.system(f'mkdir {save_dir}/{ploid}')
        os.system(f'mkdir {save_dir}/{ploid}/{spec}')
        os.system(f'mkdir {save_dir}/{ploid}/{spec}/{img_type}')
            
            
    # Open files, mask and get product o mask and image
    raw_img = cv2.imread(dirpath + '/' + img_name)
    nm = (f'{mask_dir}/{ploid}/{img_type}/{img_name}').replace('.jpg','.png').replace('.JPG','.png')
    mask = cv2.imread(nm)
    
    if mask is None:
        logs.write(f'There is no mask for image: {dirpath}/{img_name}\n')
        return
    if mask.shape != raw_img.shape:
        logs.write(f'Dimension problem: {dirpath}/{img_name}\n')
        return
                
    # acquire 3-channel mask for spike only
    tmp = np.moveaxis(mask, 2, 0)
    tmp[0] = tmp[1]
    tmp[2] = tmp[1]
    tmp = np.moveaxis(mask, 2, 2)
    # product of mask and image
    img = cv2.bitwise_and(raw_img, tmp)
            
    # Make crop and unsqueeze to square binary-power form
    original = img.copy()
    gray = cv2.cvtColor(tmp, cv2.COLOR_BGR2GRAY)
    # thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
            
    # Find contours, obtain bounding box, extract and save ROI
    cnts = cv2.findContours(gray, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]

    # List with dimensions of contours
    areas = []
    for c in cnts:
        x,y,w,h = cv2.boundingRect(c)
        areas.append((x, y, w, h, w*h))
                    
    areas = sorted(areas, key=lambda tup: tup[4], reverse=True)
    # we need second largest bounding box, first is the overall image
    x,y,w,h,_ = areas[0]
                    
    # get rectangle with corner x,y and the approproate w,h
    cv2.rectangle(img, (x, y), (x + w, y + h), (36,255,12), 2)
    img = original[y:y+h, x:x+w] 
                
    # save bounding box
    spec = spec.replace('\ ', ' ')
    cv2.imwrite((f'{save_dir}/{ploid}/{spec}/{img_type}/{img_name}').replace('.jpg', '.png').replace('.JPG', '.png'), img)

In [None]:
for dirpath, dirnames, filenames in os.walk(img_dir):
    for fn in filenames:
        # Get data of ploidness, img_name, img_type
        ploid = re.search(ploid_patt, dirpath)
        ploid = ploid.group() if ploid else None
        spec = re.search(spec_patt, dirpath)
        spec = spec.group() if spec else None
        img_type = 'pin'
        img_name = re.search(pin_patt, fn)
        if not img_name:
            img_type = 'table'
            img_name = re.search(table_patt, fn)
        if img_name:
            img_name = img_name.group()
            make_crop(ploid, spec, img_type, img_name)

### Crops for holdout

In [1]:
base_dir = '/home/jupyter-n.artemenko/projects/spikelet_pubescence/test/Holdout/'
save_path = f'{base_dir}spikelets/'
inp_path = f'/data/cv_project/spikedroid/опушение/'
mask_path = f'{base_dir}masks/'

pubesc_patt = r'опушением'

In [4]:
with open(f'{base_dir}holdout_annotation', 'w') as oup:
    for dirpath, dirnames, filenames in os.walk(inp_path):
        for fn in filenames:
            # define target
            target = 1 if re.search(pubesc_patt, dirpath) else 0
            # read image, mask and get pure image
            raw_img = cv2.imread(f'{dirpath}/{fn}')
            mask = []
            tmp_path = f'{mask_path}неопушенные/{fn}'.replace('jpg', 'png')
            if target == 1:
                tmp_path = f'{mask_path}опушенные/{fn}'.replace('jpg', 'png')
                mask = cv2.imread(tmp_path)
            else:
                mask = cv2.imread(tmp_path)
            if mask is None:
                print(tmp_path, target)
                continue
            tmp = np.moveaxis(mask, 2, 0)
            tmp[0] = tmp[1]
            tmp[2] = tmp[1]
            tmp = np.moveaxis(mask, 2, 2)
            img = cv2.bitwise_and(raw_img, tmp)
                              
            # Make crop and unsqueeze to square binary-power form
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
            
            # Find contours, obtain bounding box, extract and save ROI
            cnts = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
            cnts = cnts[0] if len(cnts) == 2 else cnts[1]
            # List with crops
            areas = []
            for c in cnts:
                x,y,w,h = cv2.boundingRect(c)
                areas.append((x, y, w, h, w*h))
            areas = sorted(areas, key=lambda tup: tup[4])
            x,y,w,h,_ = areas[-2]
            cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 0), 2)
                              
            res = img[y:y+h, x:x+w]
            cv2.imwrite(f'{save_path}{fn}', res)
            oup.write(f'{save_path}{fn};{target};test\n')