In [None]:
# Import packages as needed
import os
import time
import cv2 as cv
import numpy as np

from tqdm import tqdm
from skimage import io
from skimage.util import img_as_ubyte
from sklearn.cluster import MiniBatchKMeans

In [None]:
# Adapted from frame_extraction.py from DeepLabCut github, which has been heavily condensed and modified for my purposes
# The original file contains this attribution over the original code
#
# 'DeepLabCut Toolbox (deeplabcut.org)
# © A. & M.W. Mathis Labs
# https://github.com/DeepLabCut/DeepLabCut
#
# Please see AUTHORS for contributors.
# https://github.com/DeepLabCut/DeepLabCut/blob/master/AUTHORS
#
# Licensed under GNU Lesser General Public License v3.0'
#

# Set static variables
numframes2pick = 10
resizewidth = 30
batchsize = 5
max_iter = 50

# Change working directory to Video Folder
os.chdir('/home/moonmoon/SCCichlidVid/Group/Extraction/')

# Loop through all files in working directory
for avi in os.listdir():
    # Save video name without extension for output folder
    aviname, ext = os.path.splitext(avi)

    # Set output path for extracted frames inside folder named after video
    output_path = '/home/moonmoon/SCCichlidVid/Group/Extraction/extracted-frames/' + str(aviname) + '/'

    # If output_path exists
    if os.path.exists(output_path):
        pass # do nothing
    else:
        os.makedirs(output_path, exist_ok = True) # make output_path

    # Reset frame index and allocation indicator
    Index = None
    allocated = None

    # Check that file ends with .avi
    if avi.endswith(".avi"):
        # Print aviname
        print(aviname)

        # Open the video file
        cap = cv.VideoCapture(avi)

        # Pull frame count
        frame_count = int(cap.get(cv.CAP_PROP_FRAME_COUNT))

        # Set original dimensions for downsampling
        x, y = (320, 240)

        # Calculate ratio needed for downsampling
        ratio = resizewidth * 1.0 / x

        # Ensure batchsize is lesser than frame_count
        if batchsize > frame_count:
            batchsize = frame_count // 2

        # Setup Frame Index numpy array, length of frame_count
        if Index is None:
            Index = np.arange(0, frame_count, 1)

        # Loop through range of Index
        for counter, index in tqdm(enumerate(Index), total = frame_count):
            # Read a frame from the video
            success, frame = cap.read()

            # Check if frame is successfully read or not
            if success:
                # If read successfully, resize image based on downsampling ratio, and convert to ubyte for kmeans calculations
                image = img_as_ubyte(cv.resize(frame, None, fx=ratio, fy=ratio, interpolation=cv.INTER_NEAREST,) )
                # If allocated is not True
                if(not allocated):  #DATA not in locals(): #allocate memory in first pass
                    # Create DATA, numpy array containing frame_count and ubyte data
                    DATA = np.empty((frame_count, np.shape(image)[0], np.shape(image)[1]))
                    # Indicate DATA has been created
                    allocated = True
                # Store image mean in DATA for each frame
                DATA[counter, :, :] = np.mean(image, 2)
            # Break loop when no more frames
            else:
                break

        # Print update in code progress
        print("Kmeans clustering ... (this might take a while)")

        # Subtract DATA mean of axis 0 from DATA to create array of differences
        data = DATA - DATA.mean(axis = 0)
        data = data.reshape(frame_count, -1)  # stacking

        # Setup minibatchkmeans
        print('batching...')
        kmeans = MiniBatchKMeans(n_clusters = numframes2pick, tol = 1e-3, batch_size = batchsize, max_iter = max_iter)
        kmeans = kmeans.fit(data) # run on difference data

        print("Storing Frames")
        # Create array to store chosen frames
        frames2pick = []
        # Loop through the range of number of frames to pick inside progress bar module storing counter in clusterid
        for clusterid in tqdm(range(numframes2pick), total = numframes2pick):  # pick one frame per cluster
            # Match kmeans labels to counter and store in clusterids
            clusterids = np.where(clusterid == kmeans.labels_)[0]
            # Store clusterids length in numimagesofcluster
            numimagesofcluster = len(clusterids)
            # If length is greater than zero
            if numimagesofcluster > 0:
                # Add frame id to frames to pick array
                frames2pick.append(Index[clusterids[np.random.randint(numimagesofcluster)]])

        print("Choosing Frames")
        # Loop through frames to pick array
        for counter, index in tqdm(enumerate(frames2pick), total = len(frames2pick)):
            # Set capture object to index (frame number)
            cap.set(cv.CAP_PROP_POS_FRAMES, index)
            success, frame = cap.read() # read frame
            # Check if frame is successfully read
            if success:
                # If read successfully, create image name for frame to be saved
                img_name = (str(output_path) + str(index) + ".png")
                cv.imwrite(img_name, frame) # save frame
        print("Capture release")
        # Release video capture object
        cap.release()

cut4-cropCichlid1_080624_2_ind1


100%|██████████| 216000/216000 [00:11<00:00, 19189.89it/s]


Kmeans clustering ... (this might take a while)
batching...
Storing Frames


100%|██████████| 10/10 [00:00<00:00, 5108.77it/s]


Choosing Frames


100%|██████████| 10/10 [00:00<00:00, 106.78it/s]

Capture release



