<a href="https://colab.research.google.com/github/MatthewYancey/16-9GAN/blob/master/process_frames.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Processing

This notebook takes the video files in a folder and saves the individual frames.

In [1]:
import os
import glob
import cv2
import shutil

In [2]:
# parameters
data_in_path = '/content/gdrive/My Drive/16:9GAN/data_raw/'
data_out_path = '/content/gdrive/My Drive/16:9GAN/data_out/frames/16_9/'

seconds_per_frame = 0.5
image_size = 256
skip_seconds_beginning = 120
skip_seconds_ending = 120
number_of_images = 10000

crop_side = 'left'

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [4]:
# removes the existing frames and makes a fresh folder
try:
    shutil.rmtree(data_out_path)
except:
    print('Could not remove folder')
os.makedirs(data_out_path)

In [5]:
def dhash(image, hashSize=8):
	# convert the image to grayscale and resize the grayscale image,
	# adding a single column (width) so we can compute the horizontal
	# gradient
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	resized = cv2.resize(gray, (hashSize + 1, hashSize))
	# compute the (relative) horizontal gradient between adjacent
	# column pixels
	diff = resized[:, 1:] > resized[:, :-1]
	# convert the difference image to a hash and return it
	return sum([2 ** i for (i, v) in enumerate(diff.flatten()) if v])

In [6]:
files = glob.glob(data_in_path + '*/*')
frame_count = 0

for f in files:
    print(f)
    vidcap = cv2.VideoCapture(f)
    video_length = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT) / 24)
    success, image = vidcap.read()


    # loops through and save the frames
    image_hashes = []
    while success:
        current_frame = vidcap.get(cv2.CAP_PROP_POS_FRAMES)
        # skips the intro, outtro, and every 24 frames
        if (current_frame >= (skip_seconds_beginning * 24) and current_frame <= (video_length - skip_seconds_ending) * 24 and current_frame % (seconds_per_frame * 24) == 0):
            
            # makes a small image and does a hash on it to see if we've had this image before
            image_small = cv2.resize(image, (100, 100))
            image_hash = dhash(image_small)

            # if not a image we've had before we save it 
            if image_hash not in image_hashes:

                # does some resizeing and single side cropping
                if crop_side == 'left':
                    crop_ammount = int(image.shape[1] * 0.25 * 0.5)
                    image = image[0:image.shape[0], crop_ammount:image.shape[1]]
                if crop_side == 'right':
                    crop_ammount = int(image.shape[1] * 0.25 * 0.5)
                    image = image[0:image.shape[0], 0:image_shape[1] - crop_ammount]
                image = cv2.resize(image, (image_size, image_size))

                # cv2_imshow(image)

                # appends the hash and saves the file
                image_hashes.append(image_hash)
                cv2.imwrite(f'{data_out_path}{frame_count}.jpg', image)
                frame_count += 1

        # loop to the next frame
        success, image = vidcap.read()
        
    # breaks the loop if we have enough images
    print(f'Number of images saved: {frame_count}')
    if frame_count >= number_of_images:
        break


/content/gdrive/My Drive/16:9GAN/data_raw/FMA - 16:9/Full Metal Alchemist Brotherhood.E01.Full Metal Alchemist.avi
Number of images saved: 1770
/content/gdrive/My Drive/16:9GAN/data_raw/FMA - 16:9/Full Metal Alchemist Brotherhood.E09.Created Feelings.avi
Number of images saved: 3474
/content/gdrive/My Drive/16:9GAN/data_raw/FMA - 16:9/Full Metal Alchemist Brotherhood.E12.One is All, All is One.avi
Number of images saved: 5288
/content/gdrive/My Drive/16:9GAN/data_raw/FMA - 16:9/Full Metal Alchemist Brotherhood.E10.Separate Destinations.avi
Number of images saved: 6835
/content/gdrive/My Drive/16:9GAN/data_raw/FMA - 16:9/Full Metal Alchemist Brotherhood.E06.Road of Hope.avi
Number of images saved: 8884
/content/gdrive/My Drive/16:9GAN/data_raw/FMA - 16:9/Full Metal Alchemist Brotherhood.E11.Miracle at Rush Valley.avi
Number of images saved: 10455


In [7]:
img_list = glob.glob(data_out_path + '/*')
print('Number of images: %d' % len(img_list))

Number of images: 10455
