In [2]:
import numpy as np
import getopt
import sys
from glob import glob
import os

import constants as c
from utils import process_clip


def process_training_data(num_clips):
    """
    Processes random training clips from the full training data. Saves to TRAIN_DIR_CLIPS by
    default.

    @param num_clips: The number of clips to process. Default = 5000000 (set in __main__).

    @warning: This can take a couple of hours to complete with large numbers of clips.
    """
    num_prev_clips = len(glob(c.TRAIN_DIR_CLIPS + '*'))

    for clip_num in range(num_prev_clips, num_clips + num_prev_clips):
        clip = process_clip()

        np.savez_compressed(c.TRAIN_DIR_CLIPS + str(clip_num), clip)

        if (clip_num + 1) % 100 == 0: print('Processed %d clips' % (clip_num + 1))


def usage():
    print('Options:')
    print('-n/--num_clips= <# clips to process for training> (Default = 5000000)')
    print('-t/--train_dir= <Directory of full training frames>')
    print('-c/--clips_dir= <Save directory for processed clips>')
    print("                (I suggest making this a hidden dir so the filesystem doesn't freeze")
    print("                 with so many files. DON'T `ls` THIS DIRECTORY!)")
    print('-o/--overwrite  (Overwrites the previous data in clips_dir)')
    print('-H/--help       (Prints usage)')


def main():
    ##
    # Handle command line input
    ##

    num_clips = 10000

    try:
        opts, _ = getopt.getopt(sys.argv[1:], 'fn:t:c:oH',
                                ['fwhat','num_clips=', 'train_dir=', 'clips_dir=', 'overwrite', 'help'])
    except getopt.GetoptError:
        usage()
        sys.exit(2)

#     opts = [('-t', '../Data/Ms_Pacman/Train')]
    
    for opt, arg in opts:
        if opt in ('-n', '--num_clips'):
            num_clips = int(arg)
        if opt in ('-t', '--train_dir'):
            c.TRAIN_DIR = c.get_dir(arg)
        if opt in ('-c', '--clips_dir'):
            c.TRAIN_DIR_CLIPS = c.get_dir(arg)
        if opt in ('-o', '--overwrite'):
            c.clear_dir(c.TRAIN_DIR_CLIPS)
        if opt in ('-H', '--help'):
            usage()
            sys.exit(2)

    # set train frame dimensions
    assert os.path.exists(c.TRAIN_DIR)
    c.FULL_HEIGHT, c.FULL_WIDTH = c.get_train_frame_dims()

    ##
    # Process data for training
    ##

    process_training_data(num_clips)


if __name__ == '__main__':
    main()


Processed 10100 clips
Processed 10200 clips
Processed 10300 clips
Processed 10400 clips
Processed 10500 clips
Processed 10600 clips
Processed 10700 clips
Processed 10800 clips
Processed 10900 clips
Processed 11000 clips
Processed 11100 clips
Processed 11200 clips
Processed 11300 clips
Processed 11400 clips
Processed 11500 clips
Processed 11600 clips
Processed 11700 clips
Processed 11800 clips
Processed 11900 clips
Processed 12000 clips
Processed 12100 clips
Processed 12200 clips
Processed 12300 clips
Processed 12400 clips
Processed 12500 clips
Processed 12600 clips
Processed 12700 clips
Processed 12800 clips
Processed 12900 clips
Processed 13000 clips
Processed 13100 clips
Processed 13200 clips
Processed 13300 clips
Processed 13400 clips
Processed 13500 clips
Processed 13600 clips
Processed 13700 clips
Processed 13800 clips
Processed 13900 clips
Processed 14000 clips
Processed 14100 clips
Processed 14200 clips
Processed 14300 clips
Processed 14400 clips
Processed 14500 clips
Processed 

In [2]:
!pip install scipy==1.2.0

Collecting scipy==1.2.0
  Downloading scipy-1.2.0-cp37-cp37m-manylinux1_x86_64.whl (26.6 MB)
[K     |████████████████████████████████| 26.6 MB 19.5 MB/s eta 0:00:01
[31mERROR: umap-learn 0.4.6 has requirement scipy>=1.3.1, but you'll have scipy 1.2.0 which is incompatible.[0m
[31mERROR: tensorflow-privacy 0.5.1 has requirement tensorflow-estimator>=2.3.0, but you'll have tensorflow-estimator 2.1.0 which is incompatible.[0m
[31mERROR: tensorflow-gpu 2.1.0 has requirement scipy==1.4.1; python_version >= "3", but you'll have scipy 1.2.0 which is incompatible.[0m
[31mERROR: sklearn-pandas 2.0.3 has requirement pandas>=1.0.5, but you'll have pandas 1.0.2 which is incompatible.[0m
[31mERROR: sklearn-pandas 2.0.3 has requirement scikit-learn>=0.23.0, but you'll have scikit-learn 0.22.2.post1 which is incompatible.[0m
[31mERROR: sklearn-pandas 2.0.3 has requirement scipy>=1.4.1, but you'll have scipy 1.2.0 which is incompatible.[0m
[31mERROR: plotnine 0.7.1 has requirement pandas