In [1]:
import re # new module required for dataframe creation
import numpy as np
import pandas as pd
import os
import math # required for prediction conversion

import tensorflow as tf
import tensorflow.keras
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import json

af_dir = '../../all_faces_bucket/'
disk_data_dir = '../../all_faces_disk/home/jupyter/forensics_split/'

In [None]:
# Copy benchmark to disk
!mkdir /home/jupyter/ff_bench 
!gsutil -m cp -r gs://all_faces/benchmark_cropped_images /home/jupyter/ff_bench 

In [2]:
def get_model(architecture):
    '''Loads one of the saved models based on specified architecture'''
    
    return load_model(af_dir + 'trained_models/saved_models/' + architecture)

def get_multidim_predictions(model, bench=True):
    '''Takes in a loaded model and outputs filenames and multi-dimensional
    predictions for each class.
    
    Works by initiating an instance of ImageDataGenerator which is used for
    flow_from_directory method.'''
    # Normalise and centre test data
    datagen = ImageDataGenerator(samplewise_std_normalization=True, samplewise_center=True)
    if bench:
        generator = datagen.flow_from_directory('../../ff_bench', target_size=(224, 224),
                                            shuffle = False, batch_size=1, class_mode=None)
    else:
        generator = datagen.flow_from_directory(disk_data_dir + '/test', target_size=(224, 224),
                                            shuffle = False, batch_size=1, class_mode=None)
    filenames = generator.filenames
    nb_samples = len(filenames)
    generator.reset() # figure out this 
    predictions = model.predict(generator, steps = nb_samples, verbose=1, workers=8)
    
    return filenames, predictions

def get_image_predictions(arr, soft=True):
    '''Obtains image predictions.
    soft: a true value returns probabilities as opposed to hard predictions.'''

    if soft:
        # probability of belonging to fake (second) class,
        # hence return second value for each element in the list
        return [el[1] for el in arr]
    # returns a list of 0's and 1's
    return np.argmax(arr, axis=1)

def build_dataframe(filenames, predictions):
    index = range(len(filenames))
    df = pd.DataFrame(index = index, columns = ['method', 'video', 'image', 'test/train', 'true label',
                                                  'probability', 'predicted label', 'acc'])
    df = df.fillna(0)
    methods = [el[el.find('/')+1: el.find('_')] for el in filenames]
    video_numbers = [re.search("_(.*?)\_", el).group(1) for el in filenames]
    image_numbers = [el[el.find('_')+1: el.find('.')][4:] for el in filenames]
    true_labels = [0 if el[0] == 'a' else 1 for el in filenames]
    
    df['method'] = methods
    df['video'] =  video_numbers
    df['image'] =  image_numbers
    df['true label'] = true_labels
    df['test/train'] = ['test']*len(filenames)
    df['probability'] = predictions
    df['predicted label'] = ['-']*len(filenames)
    df['acc'] = ['-']*len(filenames)
    
    return df

In [3]:
def run_all(model, bench=True):
    '''Loads a model and returns predictions on FF++ benchmark'''
    # with tf.device('/cpu:0'): # use if GPU is training and has no leftover memory
    loaded_model = get_model(model)
    filenames, multidim_predictions = get_multidim_predictions(loaded_model, bench)
    predictions = get_image_predictions(multidim_predictions, soft=False) # adapt this if you want a threshold other than 0.5 or a different decision rule
    
    return filenames, predictions

# Test set predictions and accuracy

In [36]:
tst_filenames, tst_preds = run_all('mobilenet_new_model_fine_tunedlastepoch.h5', bench=False)
data = build_dataframe(tst_filenames, tst_preds)

# Accuracy on frame predictions (all frames)
# predicted_labels = [1 if i>0.5 else 0 for i in data['probability']]
similarities = [1 if i!=j else 0 for i,j in zip(tst_preds, data['true label'])]
1-sum(similarities)/len(tst_preds)

0.46199999999999997

# Benchmark predictions

In [5]:
filenames, preds = run_all('mobilenet_new_model_fine_tunedlastepoch.h5', bench=True)

Found 1000 images belonging to 1 classes.


In [7]:
ff_preds = ["real" if i==0 else "fake" for i in preds]
submission = {file[-8:]:pred for file, pred in zip(filenames, ff_preds)}
json_file = json.dumps(submission, indent=4)

with open(af_dir + 'predictions/' + 'test1.json', 'w', encoding='utf-8') as file:
    file.write(json_file)

In [13]:
with open('../../example_submission 4.json') as f:
    dd = json.load(f)
dd.keys()==submission.keys()

True

In [None]:
dd