# Extract the quadrat from all 732 videos in the dataset

> Develop Python scripts to organize and pre-process underwater mussel image data for ingestion by computer vision model, including:
>> a. Extraction of stills from video sequence

This notebook is configured to read a video specified by `file`, then extract the lines that compose the quadrat and estimate the appropriate corner points for cropping its 
contents. The output is a lower resolution MP4 video that is annotated with the found lines 
and corner points.

In [None]:
import os
import os.path as osp
import sys

# Check if notebook is running in Colab or local workstation
IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    # Search for all video files on Google Drive...
    from google.colab import drive
    drive.mount('/content/drive')
    DATA_PATH = r'/content/drive/My Drive/Data'
    
    # cd into current directory so local imports work
    %cd '/content/drive/My Drive/cciw-zebra-mussel/quadrat-extraction/'
    
    # clone repo, install packages
else:
    DATA_PATH = osp.join(os.environ['DATA_PATH'], 'cciw/Data')
    SAVE_PATH = osp.join(os.environ['DATA_PATH'], 'cciw/dataset_raw/quadrat-extraction/videos')

In [None]:
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from glob import glob
from utils import crop_still_image
from utils import crop_still_image_no_rotate
from utils import compute_pairwise_distances

In [None]:
DRAW = False

skipFrames = 10

# indices of data in x and y position respectively
X, Y = 0, 1

In [None]:
all_videos = glob(os.path.join(DATA_PATH, 'Videos_and_stills/GLNI/*/*/*/Videos/Quad*/*.mp4'))
videotable_path = os.path.join(DATA_PATH, 'Tables', 'QuadratVideos.csv')
video_df = pd.read_csv(videotable_path, index_col=0)

vpath = video_df.iloc[video_df[video_df['Name'] == file].index]['Quadrat Video Path']
tokens = video_df[video_df['Name'] == file]['Quadrat Video Path'].values[0].split('\\')
print('Found %d videos' % len(all_videos))

In [None]:
for video_index, file in enumerate(all_videos):
    
    video_name = file.split('/')[-1].split('.')[0]
    
    output_folder = os.path.join(SAVE_PATH, video_name)
    if not osp.exists(output_folder):
        os.makedirs(output_folder)
        print('Made output folder ', output_folder)
    else:
        print('Output folder %s already exists' % output_folder)

    cap = cv2.VideoCapture(file)
    sz = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
          int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    print('Raw input resolution', sz)

    # read first frame to adjust resolution of output stream
    ret, im = cap.read() 

    # set additional meta-parameters according to input res
    if sz[0] == 1440:
        """x_trim and y_trim are used to remove black padding 
        which triggers spurious edges"""
        x_trim, y_trim = 1, 145
        im = im[y_trim:-y_trim, x_trim:-x_trim, :]
        crop_frame_border = True

        """@param canny_thresh# hysteresis values for Canny edge 
        detector, input to HoughLines"""
        canny_thresh1, canny_thresh2 = 10, 45

        """@param threshold Accumulator threshold, return 
        lines with more than threshold of votes. (intersection points)"""
        threshold = 125

        """@param minLineLength Minimum line length. 
        Line segments shorter than that are rejected. (pixels)"""
        mLL = 400

        """@param maxLineGap Maximum allowed gap between points 
        on the same line to link them. (pixels)"""
        mLG = 150
    else:
        # params as described above
        canny_thresh1, canny_thresh2 = 40, 100
        threshold = 125
        mLG, mLL = 150, 500
        crop_frame_border = False

    """this method may downsample, so set the video writer 
    resolution to the processed image resolution"""
    img, _, crop = crop_still_image_no_rotate(
            im, mll=mLL, mlg=mLG, threshold=threshold, canny_1=canny_thresh1, canny_2=canny_thresh2, do_draw=DRAW)
    sz = (img.shape[1], img.shape[0])
    print(sz)

    if cap.isOpened():
        print('Opened stream for writing, output resolution is', sz)
    else:
        print('cap is not open')


    currentFrame = 0
    """it can take 30s-1min to process entire video, 
    can optionally process a small number of frames"""

    # to process whole video    
    while(True):

        # Capture frame-by-frame
        for i in range(skipFrames):
            ret, im = cap.read()
        if not ret: break

        if crop_frame_border:
            im = im[y_trim:-y_trim, x_trim:-x_trim, :]

        # Do processing
        img, _, crop = crop_still_image(
            im, mll=mLL, mlg=mLG, threshold=threshold, canny_1=canny_thresh1, canny_2=canny_thresh2, do_draw=DRAW)

        # save still image in jpeg format
        try:
            x_start = crop[:, X].min()
            x_end = crop[:, X].max()
            y_start = crop[:, Y].min()
            y_end = crop[:, Y].max()

            if (compute_pairwise_distances(crop)[:, 2] < mLL).sum():
                print('Cannot crop: found corners do not form a square')
            else:
                try:
                    out_file = osp.join(
                        output_folder, video_name + '_crop_frame-%d.jpg' % currentFrame)
                    cv2.imwrite(out_file, img[y_start:y_end, x_start:x_end, :])
                except:
                    print('Cannot write ', out_file)
        except:
            pass

        # increment frame counter
        currentFrame += skipFrames
    cap.release()

You should see new mp4 files when listing current dir. You may not be able to view them directly in Google Drive,
but these can be downloaded to your local machine.