In [28]:
from pathlib import Path
import ffmpeg
import yaml
import tqdm
import sys
import shutil
import pandas as pd
import numpy as np
from tempfile import NamedTemporaryFile

from simple_file_checksum import get_checksum
from joblib import Parallel, delayed

In [2]:
settings = yaml.load(open('settings.yaml'), Loader=yaml.FullLoader)

## create raw file database

create database of raw video files, and fill in the required information. 
This leaves open the start and end time of each of the video cuts, which need to be set by hand after running the code below. 

In [4]:
def creator_info_from_filename(f):
    if f[:6] == 'pexels':
        cr_str = f[7:].split(' ')[0].split('.')[0]
        cr_str = cr_str.replace('_', '-')
        return ' '.join(cr_str.split('-')[:-1]), cr_str.split('-')[-1]
    elif f[:10] == 'production':
        cr_str = f[14:].split(' ')[0].split('.')[0]
        return 'pexels_unknown', cr_str
    elif f[:5] == 'apple':
        return 'apple', f[6:].split('.')[0]
    else:
        return 'unknown', 'unknown'

In [5]:
fp_f_names = sorted(list(Path(settings['raw_video_directory']).glob('*.*')))
f_names = [p.name for p in fp_f_names]
creator_info = [creator_info_from_filename(f) for f in f_names]

hashes = Parallel(n_jobs=settings['preferred_n_jobs_info'])(delayed(get_checksum)(f, algorithm="MD5") for f in tqdm.tqdm(fp_f_names))
raw_video_info = Parallel(n_jobs=settings['preferred_n_jobs_info'])(delayed(ffmpeg.probe)(f) for f in tqdm.tqdm(fp_f_names))

video_info = []
for vi in raw_video_info:
    video_stream = next((stream for stream in vi['streams'] if stream['codec_type'] == 'video'), None)
    video_info.append({dc: video_stream[dc] for dc in settings['desired_video_info']})

if not Path('raw_video_db.tsv').exists():
    db_df = pd.DataFrame(columns=['v_index', 'file_name', 'MD5_hash', 'creator', 'video_id', 'tmin', 'tmax'].extend(settings['desired_video_info']))
    db_df['v_index'] = [str(n+1).zfill(4) for n in np.arange(len(f_names))]
    db_df['tmin'] = -1
    db_df['tmax'] = np.inf
    db_df['file_name'] = f_names
    db_df['MD5_hash'] = hashes
    db_df['creator'], db_df['video_id'] = zip(*creator_info)
    db_df = pd.concat([db_df, pd.DataFrame(video_info)], axis=1)

    db_df.to_csv('raw_video_db.tsv', sep='\t', index=False)


  0%|          | 0/2215 [00:00<?, ?it/s]

100%|██████████| 2215/2215 [04:52<00:00,  7.58it/s]
100%|██████████| 2215/2215 [00:06<00:00, 353.84it/s]


## Convert videos

The below code uses ffmpeg-python to convert the source "raw" videos to the stimulus-ready videos in one fell swoop. The order of operations is set up such that maximal quality of the resulting stimulus materials can be attained. 

This code should be run with the tmin and tmax values from the hand-edited database generated by the code above. 

In [78]:
def convert_video(video_filepath, tmin, tmax, duration, ffmpeg_settings, reverse=True, hflip=False, output_filepath=None):
    if output_filepath is None:
        output_filepath = NamedTemporaryFile(suffix='mp4')
    pts = duration/(tmax-tmin)
    try:
        pipeline = (
            ffmpeg
            .input(video_filepath)
            .trim(start=tmin, end=tmax)
            .setpts(f'{pts}*PTS-{tmin}/TB')
            .filter('fps', fps=ffmpeg_settings['framerate'], round='up')
            .filter('scale', size=ffmpeg_settings['size'], force_original_aspect_ratio='increase')
        )
        if reverse:
            pipeline = pipeline.filter('reverse')
        if hflip:
            pipeline = pipeline.hflip()
        pipeline = pipeline.output(filename=output_filepath,
                    s=ffmpeg_settings['size'],
                    codec=ffmpeg_settings['codec'],
                    pix_fmt=ffmpeg_settings['pix_fmt'],
                    preset=ffmpeg_settings['preset'],
                    crf=ffmpeg_settings['crf'],
                    format=ffmpeg_settings['format'],
                    framerate=ffmpeg_settings['framerate']
                    )
        # pipeline.view(filename=output_filepath.replace('.mp4', '.png'))
        pipeline.run(overwrite_output=True, quiet=True)
    except ffmpeg.Error as e:
        print(e.stderr.decode(), file=sys.stderr)
        raise e

In [74]:
# below is for testing

settings = yaml.load(open('settings.yaml'), Loader=yaml.FullLoader)

convert_video(Path('/tank/shared/2022/arrow_of_time/derivatives/stimuli/raw/flat_db/apple_001C94AE-2BA4-4E77-A202-F7DE60E8B1C8.mov'),
              tmin=1,
              tmax=3.5,
              duration=settings['video_duration'],
              ffmpeg_settings=settings['ffmpeg_settings'],
              output_filepath='/tank/tkn219/Downloads/ffmpeg_test.mp4')

[2153690:2153690:0128/001342.616061:ERROR:ozone_platform_x11.cc(239)] Missing X server or $DISPLAY
[2153690:2153690:0128/001342.616104:ERROR:env.cc(255)] The platform failed to initialize.  Exiting.


In [80]:
# and we'd run the conversion with something like this (untested!)
settings = yaml.load(open('settings.yaml'), Loader=yaml.FullLoader)

db_df = pd.read_csv('edited_video_db.tsv', sep='\t')

for direction, dir_label in zip([False, True], ['fw', 'rv']):
    Parallel(n_jobs=settings['preferred_n_jobs_transcode'], backend="multiprocessing")(delayed(convert_video)(
                  video_filepath=Path(settings['raw_video_directory']) / row['file_name'],
                  tmin=row['tmin'],
                  tmax=row['tmax'],
                  duration=settings['video_duration'],
                  ffmpeg_settings=settings['ffmpeg_settings'],
                  reverse=direction,
                  hflip=bool(row['horizontal_flip']),
                  output_filepath=Path(settings['stimulus_video_directory']) / f'{str(row["v_index"]).zfill(4)}_{dir_label}.mp4') for i, row in tqdm.tqdm(db_df.iterrows()))




[A[A

15it [03:40, 14.68s/it]


[A[A

[A[A

[A[A

In [13]:
Path(settings['raw_video_directory']) / db_df.loc[0]['file_name']

PosixPath('/tank/shared/2022/arrow_of_time/derivatives/stimuli/raw/flat_db/Person Going Through A Bible.mp4')

In [44]:
direction, dir_label = False, 'fw'
row = db_df.loc[0]
i = 0
print(str(Path(settings['raw_video_directory']) / row['file_name']))
convert_video(video_filepath=Path(settings['raw_video_directory']) / row['file_name'],
              tmin=row['tmin'],
              tmax=row['tmax'],
              duration=settings['video_duration'],
              ffmpeg_settings=settings['ffmpeg_settings'],
              reverse=direction,
              hflip=bool(row['horizontal_flip']),
              output_filepath=Path(settings['stimulus_video_directory']) / f'{row["v_index"]}_{dir_label}.mp4')

/tank/shared/2022/arrow_of_time/derivatives/stimuli/raw/flat_db/Person Going Through A Bible.mp4
moving /tank/shared/2022/arrow_of_time/derivatives/stimuli/raw/flat_db/Person Going Through A Bible.mp4 to /tmp/0040780045.mp4


In [41]:
Path(settings['raw_video_directory']) / row['file_name']

PosixPath('/tank/shared/2022/arrow_of_time/derivatives/stimuli/raw/flat_db/Person Going Through A Bible.mp4')