In [1]:
import pandas as pd
import shutil
import numpy as np
import os
import cv2
from random import seed, shuffle
seed(42)

In [2]:
def save_frame(read_path):
    
    path = os.path.join(read_path)
    print(path)
    filename = os.path.basename(path)
    print(filename)
    write_path = os.path.join('/'.join(read_path.split('/')[:-3]),'frames',read_path.split('/')[-2],filename.replace('.','_').replace('-','_'))
    print(write_path)
    os.makedirs(write_path, exist_ok=True)
    
    video = cv2.VideoCapture(path)
    counter=0
    if not video.isOpened():
        print("Not working; move notebook to same folder as videos or do 'pip install opencv-contrib-python'")
    while(video.isOpened()):
        ret, frame = video.read()
        if ret==True:
            
            cv2.imwrite(os.path.join(write_path,"frame_%d.jpg" % counter), frame)  
            counter+=1
            ret, frame = video.read()
            if cv2.waitKey(20) & 0xFF == ord('q'):
                break
        else:
            break   
    
    print(f'Total Frames written for video {filename} are {counter} in folder {write_path}')
    video.release()

## Create Lists of Fake/Real Video Names

Put dataset in the same folder as this notebook, or modify the path accordingly.

In [4]:
df = pd.read_json("./dataset/metadata.json").T
fakes = df.loc[df['label'] == "FAKE"]
fakes = fakes.index.tolist()

In [5]:
real = df.loc[df['label'] == "REAL"]
real = real.index.tolist()

## Sort into Separate Folders

In [6]:
destination_fake = "./fake"
os.mkdir(destination_fake)
for file in fakes:
    src_path = f'./dataset/{file}'
    dst_path = destination_fake
    shutil.move(src_path, dst_path)

In [7]:
destination_real = "./real"
os.mkdir(destination_real)
for file in real:
    src_path = f'./dataset/{file}'
    dst_path = destination_real
    shutil.move(src_path, dst_path)

## Extract Frames

In [8]:
"""
Note: this takes a while
"""
for file in fakes:
    src_path = f'{destination_fake}/{file}'
    save_frame(src_path)

./fake/aagfhgtpmv.mp4
aagfhgtpmv.mp4
frames\fake\aagfhgtpmv_mp4
Total Frames written for video aagfhgtpmv.mp4 are 150 in folder frames\fake\aagfhgtpmv_mp4
./fake/aapnvogymq.mp4
aapnvogymq.mp4
frames\fake\aapnvogymq_mp4
Total Frames written for video aapnvogymq.mp4 are 150 in folder frames\fake\aapnvogymq_mp4
./fake/abofeumbvv.mp4
abofeumbvv.mp4
frames\fake\abofeumbvv_mp4
Total Frames written for video abofeumbvv.mp4 are 150 in folder frames\fake\abofeumbvv_mp4
./fake/abqwwspghj.mp4
abqwwspghj.mp4
frames\fake\abqwwspghj_mp4
Total Frames written for video abqwwspghj.mp4 are 150 in folder frames\fake\abqwwspghj_mp4
./fake/acifjvzvpm.mp4
acifjvzvpm.mp4
frames\fake\acifjvzvpm_mp4
Total Frames written for video acifjvzvpm.mp4 are 150 in folder frames\fake\acifjvzvpm_mp4
./fake/acqfdwsrhi.mp4
acqfdwsrhi.mp4
frames\fake\acqfdwsrhi_mp4
Total Frames written for video acqfdwsrhi.mp4 are 150 in folder frames\fake\acqfdwsrhi_mp4
./fake/acxnxvbsxk.mp4
acxnxvbsxk.mp4
frames\fake\acxnxvbsxk_mp4
Total 

Total Frames written for video augtsuxpzc.mp4 are 150 in folder frames\fake\augtsuxpzc_mp4
./fake/avfitoutyn.mp4
avfitoutyn.mp4
frames\fake\avfitoutyn_mp4
Total Frames written for video avfitoutyn.mp4 are 150 in folder frames\fake\avfitoutyn_mp4
./fake/avgiuextiz.mp4
avgiuextiz.mp4
frames\fake\avgiuextiz_mp4
Total Frames written for video avgiuextiz.mp4 are 150 in folder frames\fake\avgiuextiz_mp4
./fake/avibnnhwhp.mp4
avibnnhwhp.mp4
frames\fake\avibnnhwhp_mp4
Total Frames written for video avibnnhwhp.mp4 are 150 in folder frames\fake\avibnnhwhp_mp4
./fake/avnqydkqjj.mp4
avnqydkqjj.mp4
frames\fake\avnqydkqjj_mp4
Total Frames written for video avnqydkqjj.mp4 are 150 in folder frames\fake\avnqydkqjj_mp4
./fake/avssvvsdhz.mp4
avssvvsdhz.mp4
frames\fake\avssvvsdhz_mp4
Total Frames written for video avssvvsdhz.mp4 are 150 in folder frames\fake\avssvvsdhz_mp4
./fake/avtycwsgyb.mp4
avtycwsgyb.mp4
frames\fake\avtycwsgyb_mp4
Total Frames written for video avtycwsgyb.mp4 are 150 in folder frames

Total Frames written for video bnjcdrfuov.mp4 are 150 in folder frames\fake\bnjcdrfuov_mp4
./fake/bntlodcfeg.mp4
bntlodcfeg.mp4
frames\fake\bntlodcfeg_mp4
Total Frames written for video bntlodcfeg.mp4 are 150 in folder frames\fake\bntlodcfeg_mp4
./fake/bofqajtwve.mp4
bofqajtwve.mp4
frames\fake\bofqajtwve_mp4
Total Frames written for video bofqajtwve.mp4 are 150 in folder frames\fake\bofqajtwve_mp4
./fake/boovltmuwi.mp4
boovltmuwi.mp4
frames\fake\boovltmuwi_mp4
Total Frames written for video boovltmuwi.mp4 are 150 in folder frames\fake\boovltmuwi_mp4
./fake/bopqhhalml.mp4
bopqhhalml.mp4
frames\fake\bopqhhalml_mp4
Total Frames written for video bopqhhalml.mp4 are 150 in folder frames\fake\bopqhhalml_mp4
./fake/bourlmzsio.mp4
bourlmzsio.mp4
frames\fake\bourlmzsio_mp4
Total Frames written for video bourlmzsio.mp4 are 150 in folder frames\fake\bourlmzsio_mp4
./fake/bpwzipqtxf.mp4
bpwzipqtxf.mp4
frames\fake\bpwzipqtxf_mp4
Total Frames written for video bpwzipqtxf.mp4 are 150 in folder frames

Total Frames written for video cfyduhpbps.mp4 are 150 in folder frames\fake\cfyduhpbps_mp4
./fake/cglxirfaey.mp4
cglxirfaey.mp4
frames\fake\cglxirfaey_mp4
Total Frames written for video cglxirfaey.mp4 are 150 in folder frames\fake\cglxirfaey_mp4
./fake/cgvrgibpfo.mp4
cgvrgibpfo.mp4
frames\fake\cgvrgibpfo_mp4
Total Frames written for video cgvrgibpfo.mp4 are 150 in folder frames\fake\cgvrgibpfo_mp4
./fake/chzieimrwu.mp4
chzieimrwu.mp4
frames\fake\chzieimrwu_mp4
Total Frames written for video chzieimrwu.mp4 are 149 in folder frames\fake\chzieimrwu_mp4
./fake/ckbdwedgmc.mp4
ckbdwedgmc.mp4
frames\fake\ckbdwedgmc_mp4
Total Frames written for video ckbdwedgmc.mp4 are 150 in folder frames\fake\ckbdwedgmc_mp4
./fake/cknyxaqouy.mp4
cknyxaqouy.mp4
frames\fake\cknyxaqouy_mp4
Total Frames written for video cknyxaqouy.mp4 are 150 in folder frames\fake\cknyxaqouy_mp4
./fake/cksanfsjhc.mp4
cksanfsjhc.mp4
frames\fake\cksanfsjhc_mp4
Total Frames written for video cksanfsjhc.mp4 are 150 in folder frames

Total Frames written for video deyyistcrd.mp4 are 150 in folder frames\fake\deyyistcrd_mp4
./fake/dfbpceeaox.mp4
dfbpceeaox.mp4
frames\fake\dfbpceeaox_mp4
Total Frames written for video dfbpceeaox.mp4 are 150 in folder frames\fake\dfbpceeaox_mp4
./fake/dgmevclvzy.mp4
dgmevclvzy.mp4
frames\fake\dgmevclvzy_mp4
Total Frames written for video dgmevclvzy.mp4 are 150 in folder frames\fake\dgmevclvzy_mp4
./fake/dgxrqjdomn.mp4
dgxrqjdomn.mp4
frames\fake\dgxrqjdomn_mp4
Total Frames written for video dgxrqjdomn.mp4 are 150 in folder frames\fake\dgxrqjdomn_mp4
./fake/dgzklxjmix.mp4
dgzklxjmix.mp4
frames\fake\dgzklxjmix_mp4
Total Frames written for video dgzklxjmix.mp4 are 150 in folder frames\fake\dgzklxjmix_mp4
./fake/dhcselezer.mp4
dhcselezer.mp4
frames\fake\dhcselezer_mp4
Total Frames written for video dhcselezer.mp4 are 150 in folder frames\fake\dhcselezer_mp4
./fake/dhevettufk.mp4
dhevettufk.mp4
frames\fake\dhevettufk_mp4
Total Frames written for video dhevettufk.mp4 are 149 in folder frames

Total Frames written for video dzqwgqewhu.mp4 are 150 in folder frames\fake\dzqwgqewhu_mp4
./fake/dzvyfiarrq.mp4
dzvyfiarrq.mp4
frames\fake\dzvyfiarrq_mp4
Total Frames written for video dzvyfiarrq.mp4 are 150 in folder frames\fake\dzvyfiarrq_mp4
./fake/dzwkmcwkwl.mp4
dzwkmcwkwl.mp4
frames\fake\dzwkmcwkwl_mp4
Total Frames written for video dzwkmcwkwl.mp4 are 150 in folder frames\fake\dzwkmcwkwl_mp4
./fake/eahlqmfvtj.mp4
eahlqmfvtj.mp4
frames\fake\eahlqmfvtj_mp4
Total Frames written for video eahlqmfvtj.mp4 are 150 in folder frames\fake\eahlqmfvtj_mp4
./fake/eajlrktemq.mp4
eajlrktemq.mp4
frames\fake\eajlrktemq_mp4
Total Frames written for video eajlrktemq.mp4 are 150 in folder frames\fake\eajlrktemq_mp4
./fake/ebchwmwayp.mp4
ebchwmwayp.mp4
frames\fake\ebchwmwayp_mp4
Total Frames written for video ebchwmwayp.mp4 are 150 in folder frames\fake\ebchwmwayp_mp4
./fake/ebebgmtlcu.mp4
ebebgmtlcu.mp4
frames\fake\ebebgmtlcu_mp4
Total Frames written for video ebebgmtlcu.mp4 are 150 in folder frames

Total Frames written for video etdcqxabww.mp4 are 150 in folder frames\fake\etdcqxabww_mp4
./fake/etejaapnxh.mp4
etejaapnxh.mp4
frames\fake\etejaapnxh_mp4
Total Frames written for video etejaapnxh.mp4 are 150 in folder frames\fake\etejaapnxh_mp4
./fake/etmcruaihe.mp4
etmcruaihe.mp4
frames\fake\etmcruaihe_mp4
Total Frames written for video etmcruaihe.mp4 are 150 in folder frames\fake\etmcruaihe_mp4
./fake/etohcvnzbj.mp4
etohcvnzbj.mp4
frames\fake\etohcvnzbj_mp4
Total Frames written for video etohcvnzbj.mp4 are 150 in folder frames\fake\etohcvnzbj_mp4
./fake/eukvucdetx.mp4
eukvucdetx.mp4
frames\fake\eukvucdetx_mp4
Total Frames written for video eukvucdetx.mp4 are 150 in folder frames\fake\eukvucdetx_mp4


In [9]:
"""
Note: this takes a while
"""
for file in real:
    src_path = f'{destination_real}/{file}'
    save_frame(src_path)

./real/abarnvbtwb.mp4
abarnvbtwb.mp4
frames\real\abarnvbtwb_mp4
Total Frames written for video abarnvbtwb.mp4 are 150 in folder frames\real\abarnvbtwb_mp4
./real/aelfnikyqj.mp4
aelfnikyqj.mp4
frames\real\aelfnikyqj_mp4
Total Frames written for video aelfnikyqj.mp4 are 150 in folder frames\real\aelfnikyqj_mp4
./real/afoovlsmtx.mp4
afoovlsmtx.mp4
frames\real\afoovlsmtx_mp4
Total Frames written for video afoovlsmtx.mp4 are 150 in folder frames\real\afoovlsmtx_mp4
./real/agrmhtjdlk.mp4
agrmhtjdlk.mp4
frames\real\agrmhtjdlk_mp4
Total Frames written for video agrmhtjdlk.mp4 are 150 in folder frames\real\agrmhtjdlk_mp4
./real/ahqqqilsxt.mp4
ahqqqilsxt.mp4
frames\real\ahqqqilsxt_mp4
Total Frames written for video ahqqqilsxt.mp4 are 150 in folder frames\real\ahqqqilsxt_mp4
./real/ajqslcypsw.mp4
ajqslcypsw.mp4
frames\real\ajqslcypsw_mp4
Total Frames written for video ajqslcypsw.mp4 are 150 in folder frames\real\ajqslcypsw_mp4
./real/anpuvshzoo.mp4
anpuvshzoo.mp4
frames\real\anpuvshzoo_mp4
Total 

Total Frames written for video dhcndnuwta.mp4 are 150 in folder frames\real\dhcndnuwta_mp4
./real/dhxctgyoqj.mp4
dhxctgyoqj.mp4
frames\real\dhxctgyoqj_mp4
Total Frames written for video dhxctgyoqj.mp4 are 150 in folder frames\real\dhxctgyoqj_mp4
./real/djxdyjopjd.mp4
djxdyjopjd.mp4
frames\real\djxdyjopjd_mp4
Total Frames written for video djxdyjopjd.mp4 are 150 in folder frames\real\djxdyjopjd_mp4
./real/dkuayagnmc.mp4
dkuayagnmc.mp4
frames\real\dkuayagnmc_mp4
Total Frames written for video dkuayagnmc.mp4 are 150 in folder frames\real\dkuayagnmc_mp4
./real/dkzvdrzcnr.mp4
dkzvdrzcnr.mp4
frames\real\dkzvdrzcnr_mp4
Total Frames written for video dkzvdrzcnr.mp4 are 150 in folder frames\real\dkzvdrzcnr_mp4
./real/dlpoieqvfb.mp4
dlpoieqvfb.mp4
frames\real\dlpoieqvfb_mp4
Total Frames written for video dlpoieqvfb.mp4 are 150 in folder frames\real\dlpoieqvfb_mp4
./real/drcyabprvt.mp4
drcyabprvt.mp4
frames\real\drcyabprvt_mp4
Total Frames written for video drcyabprvt.mp4 are 150 in folder frames

## Train-Test-Val Split

In [2]:
fake_folders = os.listdir('frames/fake')
print(len(fake_folders))
real_folders = os.listdir('frames/real')
print(len(real_folders))

323
77


In [3]:
shuffle(fake_folders)
shuffle(real_folders)

In [4]:
train_fake = fake_folders[:259]
test_fake = fake_folders[259:291]
val_fake = fake_folders[291:]

In [5]:
train_real = real_folders[:61]
test_real = real_folders[61:69]
val_real = real_folders[69:]

In [6]:
os.mkdir('frames/train')
os.mkdir('frames/test')
os.mkdir('frames/val')

In [7]:
os.mkdir('frames/train/fake')
os.mkdir('frames/train/real')
os.mkdir('frames/test/fake')
os.mkdir('frames/test/real')
os.mkdir('frames/val/fake')
os.mkdir('frames/val/real')

In [9]:
root = 'frames/real'
destination_train = 'frames/train/real'
for file in train_real:
    src_path = f'{root}/{file}'
    shutil.move(src_path, destination_train)
    
destination_test = 'frames/test/real'
for file in test_real:
    src_path = f'{root}/{file}'
    shutil.move(src_path, destination_test)
    
destination_val = 'frames/val/real'
for file in val_real:
    src_path = f'{root}/{file}'
    shutil.move(src_path, destination_val)

In [10]:
root = 'frames/fake'
destination_train = 'frames/train/fake'
for file in train_fake:
    src_path = f'{root}/{file}'
    shutil.move(src_path, destination_train)
    
destination_test = 'frames/test/fake'
for file in test_fake:
    src_path = f'{root}/{file}'
    shutil.move(src_path, destination_test)
    
destination_val = 'frames/val/fake'
for file in val_fake:
    src_path = f'{root}/{file}'
    shutil.move(src_path, destination_val)

In [11]:
os.rmdir('frames/fake')
os.rmdir('frames/real')