In [1]:
#!pip install ../input/mtcnn-package/mtcnn-0.1.0-py3-none-any.whl

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib.patches import Rectangle # For drawing rectangle around faces

import os
import sys
import gc

import keras
from keras import Model, Sequential
from keras.layers import *
from keras.optimizers import *
from keras.callbacks import LearningRateScheduler

import torch

from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss

import cv2
from mtcnn import MTCNN

from tqdm.notebook import tqdm
import random
import json # To read the metadata

import warnings
warnings.filterwarnings("ignore")

Using TensorFlow backend.


First we define our directory paths and directory lists - including the directory where we will save our train and test images that we extract from the videos. 

In [2]:
train_sample_videos_path = '../input/train_sample_videos/'
test_videos_path = '../input/test_videos/'
train_sample_images_path = "../input/train_sample_images/" # path to save train images to
test_images_path = "../input/test_images/" # path to save test images to

test_videos = os.listdir(test_videos_path) # List test vids

Now we loop round all the videos in our directory to extract images for each video.

In [3]:
def extract_faces(videos_dir_path, images_dir_path, frames=1, conf_level=0.95):
    """
    Inputs a directory of videos, extracts n frames. 
    Outputs images of ANY faces detected in those frames.
    
    videos_dir_path: (str) Path to your directory of videos
    images_dir_path: (str) Path to where you'll save your images to
    frames: (int or list) Number of frames. If int, take that many 
            frames. If list, take frame numbers specified in list. 
    """
    def crop(img, x, y, w, h):
        """
        Crop and reshape images to be uniform across all frames
        """
        x -= 40
        y -= 40
        w += 80
        h += 80
        if x < 0:
            x = 0
        if y <= 0:
            y = 0
        return cv2.cvtColor(cv2.resize(img[y:y + h, x:x + w], (256, 256)), cv2.COLOR_BGR2RGB)
    
    device = 'CUDA:0' if torch.cuda.is_available() else 'CPU'
    print(f'Running on device: {device}')
    videos_dir = os.listdir(videos_dir_path) # List train vids
    
    # Extract images from videos
    if type(frames) == list:
        print(f'Extracting frames {frames} from videos')
    else:
        print(f'Extracting {frames} random frames from videos')
        
    with tqdm(total=len(range(0,50))) as pbar: # Edited out len(videos_dir) for dev
        for i in range(0, 50): # Edited out len(videos_dir) for dev
            file_name = videos_dir[i] # file name with .ext
            vid_name = file_name.split('.')[0] # file name without .ext
            file_path = videos_dir_path + file_name # full file path

            if type(frames) == list:
                for num in range(0, len(frames)):
                    cap = cv2.VideoCapture(file_path)
                    total_frames = cap.get(7)
                    vid_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
                    cap.set(1, num) # EDIT HERE FOR FRAME NUMBER
                    ret, frame = cap.read()
                    image_name = vid_name + '_' + str(num) + '.jpg'
                    cv2.imwrite(os.path.join(train_sample_images_path, image_name), frame) # Save frame as image
                    cv2.destroyAllWindows()
                    cap.release()
            else:
                for num in range(0, frames):
                    cap = cv2.VideoCapture(file_path)
                    total_frames = cap.get(7)
                    vid_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
                    cap.set(1, random.randint(0, vid_length)) # EDIT HERE FOR FRAME NUMBER
                    ret, frame = cap.read()
                    image_name = vid_name + '_' + str(num) + '.jpg'
                    cv2.imwrite(os.path.join(train_sample_images_path, image_name), frame) # Save frame as image
                    cv2.destroyAllWindows()
                    cap.release()
            pbar.update(1)
    
    images_dir = os.listdir(images_dir_path) # List newly created training images
    detector = MTCNN()

    print('Extracting faces from frames')
    with tqdm(total=len(images_dir)) as pbar:
        for image in range(0, len(images_dir)):
            image_name = images_dir[image].split('.')[0] # Get image name without .ext

            # Read image and detect faces
            frame = cv2.imread(images_dir_path + images_dir[image])
            result = detector.detect_faces(frame)

            # Extract and save faces as their own images
            for face in range(0, len(result)):
                # Only extract the face if confidence is more than or equal to default 0.95
                if result[face]['confidence'] >= conf_level:            
                    startX, startY, endX, endY = result[face]['box'] # Get box coordinates
                    #crop_img = frame[startY:startY + endY, startX:startX + endX] # extract just the face
                    crop_img = crop(frame, startX, startY, endX, endY)
                    cv2.imwrite(images_dir_path + image_name + '_' + str(face) + '.jpg', crop_img)
            os.remove(images_dir_path + images_dir[image]) # Delete original image
            pbar.update(1)

In [4]:
extract_faces(train_sample_videos_path, train_sample_images_path, frames=2)

Running on device: CPU
Extracting 2 random frames from videos


HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))


Instructions for updating:
Colocations handled automatically by placer.
Extracting faces from frames


HBox(children=(FloatProgress(value=0.0), HTML(value='')))




## Read Images
Read our extracted faces, and define X and X_val. Then read the metadata to label the extracted faces as FAKE or REAL.

In [5]:
train_sample_images = os.listdir('../input/train_sample_images/')
# Read in metadata
with open('../input/train_sample_videos/metadata.json') as json_file:
    metadata = json.load(json_file)

X = []
for img in train_sample_images:
    img = train_sample_images_path + img
    img = cv2.cvtColor(cv2.imread(img), cv2.COLOR_BGR2RGB)
    X.append(img)

y = []
for label in train_sample_images:
    if metadata[label.split('_')[0] + '.mp4']['label'] == 'REAL':
        y.append(0)
    else:
        y.append(1)

In [6]:
# Create X_val from 10% of X
X_val = X[:round(len(X) / 100 * 25)]
X = X[round(len(X) / 100 * 25):]

# Create y_val from 10% of y
y_val = y[:round(len(y) / 100 * 25)]
y = y[round(len(y) / 100 * 25):]

In [7]:
def shuffle(X, y):
    new_train = []
    for m, n in zip(X, y):
        new_train.append([m, n])
    random.shuffle(new_train)
    X, y = [], []
    for x in new_train:
        X.append(x[0])
        y.append(x[1])
    return X, y

X, y = shuffle(X, y)
X_val, y_val = shuffle(X_val, y_val)

In [10]:
def InceptionLayer(a, b, c, d):
    def func(x):
        x1 = Conv2D(a, (1, 1), padding='same', activation='elu')(x)
        
        x2 = Conv2D(b, (1, 1), padding='same', activation='elu')(x)
        x2 = Conv2D(b, (3, 3), padding='same', activation='elu')(x2)
            
        x3 = Conv2D(c, (1, 1), padding='same', activation='elu')(x)
        x3 = Conv2D(c, (3, 3), dilation_rate = 2, strides=1, padding='same', activation='elu')(x3)
        
        x4 = Conv2D(d, (1, 1), padding='same', activation='elu')(x)
        x4 = Conv2D(d, (3, 3), dilation_rate=3, strides=1, padding='same', activation='elu')(x4)
        y = Concatenate(axis = -1)([x1, x2, x3, x4])
            
        return y
    return func
    
def define_model(shape=(256, 256, 3)):
    x = Input(shape=shape)
    
    x1 = InceptionLayer(1, 4, 4, 2)(x)
    x1 = BatchNormalization()(x1)
    x1 = MaxPooling2D(pool_size=(2, 2), padding='same')(x1)
    
    x2 = InceptionLayer(2, 4, 4, 2)(x1)
    x2 = BatchNormalization()(x2)        
    x2 = MaxPooling2D(pool_size=(2, 2), padding='same')(x2)        
        
    x3 = Conv2D(16, (5, 5), padding='same', activation='elu')(x2)
    x3 = BatchNormalization()(x3)
    x3 = MaxPooling2D(pool_size=(2, 2), padding='same')(x3)
        
    x4 = Conv2D(16, (5, 5), padding='same', activation='elu')(x3)
    x4 = BatchNormalization()(x4)
    x4 = MaxPooling2D(pool_size=(4, 4), padding='same')(x4)
    
    y = Flatten()(x4)
    y = Dropout(0.5)(y)
    y = Dense(16)(y)
    y = LeakyReLU(alpha=0.1)(y)
    y = Dropout(0.5)(y)
    y = Dense(1, activation='sigmoid')(y)
    model = Model(inputs=x, outputs=y)
    model.compile(loss='binary_crossentropy', optimizer=Adam(lr=1e-4))
    return model

df_model = define_model()

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [11]:
lrs = [1e-3, 5e-4, 1e-4]
def schedule(epoch):
    return lrs[epoch]

LOAD_PRETRAIN = False

In [12]:
kfolds = 5
losses = []

models = []
i = 0
while len(models) < kfolds:
    model = define_model()
    model.fit([X], [y], epochs=2, callbacks=[LearningRateScheduler(schedule)])
    pred = model.predict([X_val])
    loss = log_loss(y_val, pred)
    losses.append(loss)
    print('Fold ' + str(i) + ' model loss: ' + str(loss))
    if loss < 0.68:
        models.append(model)
    else:
        print('##############')
        print('Retraining')
        print('##############')
    K.clear_session()
    del model
    gc.collect()
    i += 1

Instructions for updating:
Use tf.cast instead.
Epoch 1/2
Epoch 2/2
Fold 0 model loss: 0.3695797858975425
Epoch 1/2
Epoch 2/2
Fold 1 model loss: 0.46828508221854764
Epoch 1/2
Epoch 2/2
Fold 2 model loss: 0.5229524488095194
Epoch 1/2
Epoch 2/2
Fold 3 model loss: 0.4476685812987853
Epoch 1/2
Epoch 2/2
Fold 4 model loss: 0.712765004185106
loss too bad, retrain!
Epoch 1/2
Epoch 2/2
Fold 5 model loss: 0.33790978603065014
