# Extract the quadrat from all still images in dataset

> Develop Python scripts to organize and pre-process underwater mussel image data for ingestion by computer vision model, including:
>> b. Cropping of stills within quadrats

### To successfully extract a quadrat the algorithm assumes:

- All four corners of the quadrat are contained in the image, i.e.~the side lengths or corners are not 
cropped from the scene.

- The quadrat side lengths are not occluded, e.g., by diver equipment, mesh bags, or vegetation.

- The image is sufficiently clear, i.e.~not turbulent or cloudy from disrupted sediment.

- The camera angle is within $65-90^{\circ}$ with respect to the top surface of the quadrat.
Note that this is separate from quadrat rotation in the camera plane, which can be arbitrary.

- The camera is not too far from the quadrat such that the side lengths are less than 400 pixels 
for $1080 \times 1440$ resolution, 500 pixels for $1080 \times 1920$ HD resolution, or 400 for portrait mode in HD res.

The algorithm still works reasonably well in some cases even when the assumptions are violated, e.g., input 7 with the mesh bag covering one of the corners, as missing corner coordinates can sometimes be inferred if enough complementary lines are detected. Conversely, even when the assumptions are satisfied, a best effort is made to extract the *interior* of the quadrat, but this occaisionally won't be possible due to missing or misleading lines and part of the quadrat may be included in the output.

Prior to running this notebook you must set:

1. The `DRAW` variable annotates intermediate results on the image and is nice for visualizing results. It 
should be set to False for saving the final output.

2. The default values for all other parameters can be left as they are.

When `DRAW=True`, you will see annotations in the __Post-Processing__ pane.
- Large green circles are corner centroids found by K-means
- Medium blue circles are all line intersection points after the final stage of processing
- Small white circles are proposed crop locations (the quadrat interior corner points)
- The X lines corresponding to 'Using X of Y lines' after rejecting outliers and merging similar lines are shown in blue

On completion you should have 26 cropped images in the folder `My Drive/Data/Quadrat_Extraction_from_Stills`

Dependencies:
- `opencv.__version__==4.2.0`
- `skimage.__version__==0.16.2` 

In [None]:
DRAW = False

In [None]:
import os
import os.path as osp
import sys

IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    DATA_PATH = r'/content/drive/My Drive/Data'
    SAVE_PATH = osp.join(DATA_PATH, 'Quadrat_Extraction_from_Stills')
    
    # cd into current directory so local imports work
    %cd '/content/drive/My Drive/cciw-zebra-mussel/quadrat-extraction/'
else:
    DATA_PATH = osp.join(os.environ['DATA_PATH'], 'cciw/Data')
    SAVE_PATH = osp.join(os.environ['DATA_PATH'], 'cciw/dataset_raw/quadrat-extraction/stills')
    
print('Reading data from', DATA_PATH)    
print('Saving cropped images to', SAVE_PATH)    

In [None]:
import cv2
import numpy as np

import time
from glob import glob

# local import
from utils import crop_still_image, compute_pairwise_distances

import matplotlib.pyplot as plt

In [None]:
# Search for all still files in the datset
stills = glob(os.path.join(DATA_PATH,'Videos_and_stills/GLNI/*/*/*/Stills/Quad*/*.jpg'))
print('Found %d still images' % len(stills))

In [None]:
# indices of data in x and y position respectively
X, Y = 0, 1

In [None]:
for i in range(len(stills)):
        
    # load the input image
    bgr = cv2.imread(stills[i])

    # set parameters according to input resolution
    if bgr.shape[1] == 1440:
        """x_trim and y_trim are used to remove black padding
        for 1080x1440 video which triggers spurious edges"""
        x_trim, y_trim = 1, 145
        bgr = bgr[y_trim:-y_trim, x_trim:-x_trim, :]
        crop_frame_border = True

        """@param canny_thresh initial hysteresis values for Canny edge 
        detector, input to HoughLines"""
        canny_thresh1, canny_thresh2 = 10, 45
        """@param threshold Accumulator threshold, return 
        lines with more than threshold of votes. (intersection points)"""
        threshold = 125
        """@param minLineLength Minimum line length. 
        Line segments shorter than that are rejected. (pixels)"""
        mLL = 400
        """@param maxLineGap Maximum allowed gap between points 
        on the same line to link them. (pixels)"""
        mLG = 150

    elif bgr.shape[1] == 1920:
        """@param canny_thresh initial hysteresis values for Canny 
        edge detector, input to HoughLines"""
        canny_thresh1, canny_thresh2 = 40, 100
        """@param threshold Accumulator threshold, return 
        lines with more than threshold of votes. (intersection points)"""
        threshold = 125
        """@param minLineLength Minimum line length. 
        Line segments shorter than that are rejected. (pixels)"""
        mLL = 500
        """@param maxLineGap Maximum allowed gap between points 
        on the same line to link them. (pixels)"""
        mLG = 150    

        # this checks to see if images contain zero padding
        for pad_idx in range(bgr.shape[1]):
            if bgr[:, pad_idx].mean() > 1:
                break
        """if pad_idx > 500 it means the HD image is in 
        portrait mode and the side lengths are smaller."""
        if pad_idx > 500:
            mLL = 400

        h_pad = 10
        bgr = bgr[:, pad_idx + h_pad:-(h_pad + pad_idx), :]

    rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)

    plt.close('all')
    plt.figure()
    plt.imshow(rgb)
    plt.title('Input %d' % (i + 1), fontsize=24)
    plt.show()

    start_time = time.time()
    bgr, edges, crop = crop_still_image(
        bgr, mll=mLL, mlg=mLG, threshold=threshold, canny_1=canny_thresh1, canny_2=canny_thresh2, do_draw=DRAW)
    print('Processing took %.2f sec' % float(time.time() - start_time))

    try:
        x_start = crop[:, X].min()
        x_end = crop[:, X].max()
        y_start = crop[:, Y].min()
        y_end = crop[:, Y].max()

        if (compute_pairwise_distances(crop)[:, 2] < mLL).sum():
            print('Cannot crop: found corners do not form a square')
        else:
            plt.figure()
            plt.imshow(cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB))
            plt.title('Post-Processing', fontsize=24)
            plt.show()

            plt.figure()
            plt.imshow(rgb[y_start:y_end, x_start:x_end, :])
            plt.show()

            try:
                out_file = stills[i].split('/')[-1].split('.')[0] + '_crop.jpg'
                cv2.imwrite(osp.join(SAVE_PATH, out_file), bgr[y_start:y_end, x_start:x_end, :])
            except:
                print('Cannot write ', out_file)
    except:
        print('Cannot crop: insufficient number of corner points found')

# End demo

In [None]:
# to plot all still images
for i in range(len(stills)):
    bgr = cv2.imread(stills[i])
    rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
    plt.close('all')
    plt.figure(figsize=(10, 8))
    plt.imshow(bgr[::2, ::2, :])
    plt.title(i + 1)
    plt.show()

In [None]:
'''
#fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
fig, ax1 = plt.subplots(1, 1, figsize=(8, 5))
#bgr = cv2.imread(stills[i])
#rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
ax1.imshow(img)
#ax1.imshow(edges)
#ax1.set_title('Canny edges', fontsize=16)
#ax2.imshow(bgr)
#ax2.set_title('Still image with corner points', fontsize=16)
ax1.axis('off')
#ax2.axis('off')
plt.tight_layout()
plt.show()
#fig.savefig('quatract-extraction-still-%d-step-4.pdf' % i)
'''