### Basic and general utilities for competition

In [1]:
# default_exp core.core

In [2]:
#export
from fastai.imports import *

In [3]:
data_path = Path("/home/ubuntu/data/dfdc/")

In [4]:
video_path = data_path/"dfdc_train/dfdc_train_part_48"

#### Reading metadata file into a dataframe

In [5]:
#export
def read_metadata(fn):
    "read DFDC competition metadata as pd.DataFrame"
    metadf = pd.read_json(fn).T.reset_index()
    metadf.columns = ['fname','label','split','original']
    return metadf 

In [6]:
from fastai.data_block import get_files
metadata = get_files(video_path, extensions=['.json'])
metadf = read_metadata(metadata[0])
metadf.head()

Unnamed: 0,fname,label,split,original
0,noagmcpxfb.mp4,FAKE,train,dgtdgrzifi.mp4
1,alzbizkswy.mp4,REAL,train,
2,xcmkbpzfzw.mp4,FAKE,train,aoqzxlwvmi.mp4
3,uywdjgfgqr.mp4,FAKE,train,oupjhtodai.mp4
4,fbqwomdehr.mp4,FAKE,train,xtixietgjp.mp4


In [7]:
metadf.shape

(2463, 4)

### Extract original video files

In [7]:
#export
def get_original_video_list(path, metadf, dest=None):
    """
    get original video list from meta dataframe
    path: path of directory to video files
    dest: destination path to save the list
    """
    fnames = list(set(metadf['original'].dropna().values))
    video_files = pd.Series(list(map(lambda o: str(path/o), fnames)))
    if dest: video_files.to_csv(dest, index=False)
    return video_files

#### Reading face detection cvs

In [None]:
#export
def read_face_detection_df(path):
    df = pd.read_csv(path)
    df.face_detections = (df.face_detections.apply(lambda o: json.loads(o.replace("'", '"'))))
    return df

#### There are total of 50 files indices (0 - 49)

In [8]:
from fastai.vision import *

In [9]:
get_train_part_data_link = lambda o: Path(f"https://www.kaggle.com/c/16880/datadownload/dfdc_train_part_{o}.zip")

In [10]:
train_data_path = (data_path/'dfdc_train'); train_data_path.ls()

[PosixPath('/home/ubuntu/data/dfdc/dfdc_train/dfdc_train_part_48'),
 PosixPath('/home/ubuntu/data/dfdc/dfdc_train/dfdc_train_part_49')]

In [11]:
link = get_train_part_data_link(47); link

PosixPath('https:/www.kaggle.com/c/16880/datadownload/dfdc_train_part_47.zip')

### Running commands

In [1]:
#export
import subprocess, shlex, datetime, json

In [2]:
#export
def _now(): return datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")

def _add_dict_to_json(fn, d):
    "Adds a dictionary to json-like file or creates one"
    assert type(d) == dict
    path = Path(fn)
    if path.exists(): l = json.loads(path.open().read())
    else: l = []    
    l.append(d)
    with open(fn, "w") as f: f.write(json.dumps(l))
        
def run_command(command, logfn=None):
    "Run shell command as an external process, optionally write logs to logfn"
    if type(command) == str: command = shlex.split(command)
    elif type(command) == list: command = command
    else: raise AssertionError("Command should be string or list")
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdout = []
    start_time = _now()
    while True:
        output = process.stdout.readline()
        if output == b'' and process.poll() is not None: break
        if output: 
            _out = output.decode(); print(_out.strip())
            stdout.append(_out)
    end_time = _now()
    rc = process.poll()
    _, stderr =  process.communicate()
    err = stderr.decode(); print(err)
    out = "".join(stdout)
    if logfn:
        d = {"start_time": start_time, "end_time": end_time, 
             "command": command, "stderr":err, "stdout":out}
        _add_dict_to_json(logfn, d)
    return rc

In [3]:
run_command(['ls', '-l'])

total 18600
-rw-rw-r-- 1 ubuntu ubuntu 4125887 Mar 14 08:16 001 - extract_faces.ipynb
-rw-rw-r-- 1 ubuntu ubuntu 4636508 Mar 14 08:16 002 - face_detection_retinaface.ipynb
-rw-rw-r-- 1 ubuntu ubuntu   17415 Mar 14 08:16 003 - save_face_crops.ipynb
-rw-rw-r-- 1 ubuntu ubuntu 1995407 Mar 14 13:53 004 - tl_baseline.ipynb
-rw-rw-r-- 1 ubuntu ubuntu    8767 Mar 15 17:08 00_core.ipynb
-rw-rw-r-- 1 ubuntu ubuntu 3011212 Mar 14 09:30 01_video_core.ipynb
-rw-rw-r-- 1 ubuntu ubuntu    3320 Mar 15 19:01 02_download_unzip_files.ipynb
-rw-rw-r-- 1 ubuntu ubuntu    4150 Mar 14 09:14 10_bbox_utils.ipynb
-rw-rw-r-- 1 ubuntu ubuntu 1055732 Mar 15 17:01 11_retinaface_detection.ipynb
-rw-rw-r-- 1 ubuntu ubuntu   14718 Mar 15 19:10 12_generate_face_detections.ipynb
-rw-rw-r-- 1 ubuntu ubuntu 1912954 Mar 15 18:06 13_save_cropped_faces.ipynb
-rw-rw-r-- 1 ubuntu ubuntu   22008 Mar 15 20:06 14_download_detect_crop_save.ipynb
-rw-rw-r-- 1 ubuntu ubuntu    2365 Mar 16 08:01 15_extract_all.ipynb
-rw-rw-r-- 1 ubu

0

### export

In [4]:
from nbdev.export import notebook2script

In [5]:
notebook2script()

Converted 001 - extract_faces.ipynb.
Converted 002 - face_detection_retinaface.ipynb.
Converted 003 - save_face_crops.ipynb.
Converted 004 - tl_baseline.ipynb.
Converted 00_core.ipynb.
Converted 01_video_core.ipynb.
Converted 02_download_unzip_files.ipynb.
Converted 10_bbox_utils.ipynb.
Converted 11_retinaface_detection.ipynb.
Converted 12_generate_face_detections.ipynb.
Converted 13_save_cropped_faces.ipynb.
Converted 14_download_detect_crop_save.ipynb.
Converted 15_extract_all.ipynb.
Converted 20_datasets.ipynb.
Converted 21_single_frame_model.ipynb.
Converted index.ipynb.
Converted inspect original fake pairs for face detection.ipynb.
