<a href="https://colab.research.google.com/github/Choa-Lee/UNIUNI_project/blob/master/extract_files.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Extract frames in video(.mp4) to divide train/test data

In [1]:
import csv
import glob
import os
import sys
from subprocess import call

In [None]:
def get_nb_frames_for_video(video_parts):
    train_or_test, classname, filename_no_ext, _ = video_parts
    generated_files = glob.glob(os.path.join(train_or_test, classname,
                                filename_no_ext + '*.jpg'))
    return len(generated_files)


In [None]:
def get_video_parts(video_path):
    parts = video_path.split(os.path.sep)
    filename = parts[2]
    filename_no_ext = filename.split('.')[0]
    classname = parts[1]
    train_or_test = parts[0]

    return train_or_test, classname, filename_no_ext, filename


In [None]:
def check_already_extracted(video_parts):
    train_or_test, classname, filename_no_ext, _ = video_parts
    return bool(os.path.exists(os.path.join(train_or_test, classname,
                               filename_no_ext + '-0001.jpg')))

In [None]:
def extract_files(extenssion='mp4'):
    #[train|test], class, filename, nb frames
    #Extracting can be done with ffmpeg:
    #`ffmpeg -i video.mpg image-%04d.jpg`
    
    data_file = []
    folders = ['train', 'test']

    for folder in folders:
        class_folders = glob.glob(os.path.join(folder, '*'))

        for vid_class in class_folders:
            class_files = glob.glob(os.path.join(vid_class, '*.' + extenssion))

            for video_path in class_files:
                # Get the parts of the file.
                video_parts = get_video_parts(video_path)

                train_or_test, classname, filename_no_ext, filename = video_parts

                if not check_already_extracted(video_parts):
                    src = os.path.join(train_or_test, classname, filename)
                    dest = os.path.join(train_or_test, classname,
                        filename_no_ext + '-%04d.jpg')
                    call(["ffmpeg", "-i", src, dest])

                nb_frames = get_nb_frames_for_video(video_parts)

                data_file.append([train_or_test, classname, filename_no_ext, nb_frames])

                print("Generated %d frames for %s" % (nb_frames, filename_no_ext))

    with open('data_file.csv', 'w') as fout:
        writer = csv.writer(fout)
        writer.writerows(data_file)

    print('Extracted and wrote %d video files.' % (len(data_file)))

In [None]:
def main():
    #Extract images from videos and make a new file that we can use as our data input file. It can have format:
    #[train|test], class, filename, nb frames
    
    if (len(sys.argv) == 2):
        extract_files(sys.argv[1])
    else:
        print('extract_files.py mp4')

if __name__ == '__main__':
    main()