<a href="https://colab.research.google.com/github/MatthewYancey/GANime/blob/master/src/process_frames_rips.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Processing

This notebook takes the video files in a folder and saves the individual frames.

## Imports and Parameters

In [1]:
import os
import glob
import cv2
import shutil
from google.colab import drive
import zipfile

In [2]:
# parameters
drive.mount('/content/gdrive')
VIDEO_PATH = '/content/gdrive/My Drive/repos/GANime/data_raw/'
TEST_PATH = '/content/gdrive/My Drive/repos/GANime/data_raw/'
FRAME_PATH = '/content/frames/'
ZIP_FOLDER = '/content/gdrive/My Drive/repos/GANime/data_out/'

IMAGE_WIDTH = 512
IMAGE_HEIGHT = 288

FRAME_SECONDS_SKIP = 0.5

TRAIN_SPLIT = 0.70
VAL_SPLIT = 0.20
TEST_SPLIT = 0.10

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


## Video loop and frame saving

In [3]:
video_files = glob.glob(f'{VIDEO_PATH}*')
video_files.sort()
print(f'Number of videos: {len(video_files)}')

Number of videos: 8


In [None]:
try:
    shutil.rmtree(FRAME_PATH)
except FileNotFoundError:
    pass
os.mkdir(FRAME_PATH)

frame_list = []
for f in video_files:
    frame_count = 0
    file_name = os.path.basename(f).replace('.m4v', '')
    vidcap = cv2.VideoCapture(f)
    video_length = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT) / 24)
    success, image = vidcap.read()

    # loops through and save the frames
    while success:
        current_frame = vidcap.get(cv2.CAP_PROP_POS_FRAMES)

        if current_frame % (FRAME_SECONDS_SKIP * 24) == 0:
            # resizes and save the frame
            image = cv2.resize(image, (IMAGE_WIDTH, IMAGE_HEIGHT))
            frame_path = f'{FRAME_PATH}/{file_name}_{frame_count}.jpg'
            cv2.imwrite(frame_path, image)
            frame_list.append(frame_path)
            frame_count += 1

        # loop to the next frame
        success, image = vidcap.read()
            
    print(f'Number of images saved: {frame_count}')

In [None]:
print(f'Total number of frames: {len(frame_list)}')

In [None]:
# makes the train test split
train_frames = frame_list[:int(len(frame_list) * TRAIN_SPLIT)]
frame_list = [f for f in frame_list if f not in train_frames]
val_frames = frame_list[:int(len(frame_list) * (VAL_SPLIT / (VAL_SPLIT + TEST_SPLIT)))]
test_frames = [f for f in frame_list if f not in val_frames]

# makes the directories
os.mkdir(FRAME_PATH + 'train/')
for f in train_frames:
    shutil.move(f, f'{FRAME_PATH}train/{os.path.basename(f)}')

shutil.make_archive(ZIP_FOLDER, 'zip', FRAME_PATH + 'train/')
print(f'Saved zip')