# Import Modules
Have to restart kernel after installing DeepStack

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras
from tensorflow.keras.models import load_model as kerasload
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import sys

af_dir = '../../all_faces_bucket/'
disk_data_dir = '../../all_faces_disk/home/jupyter/forensics_split/'
!pip install git+https://github.com/qubvel/efficientnet

sys.path.insert(1, '/home/jupyter/DeepFake-2019-20/ensembles/DeepStack/deepstack')
from base import *
from ensemble import *
import efficientnet.tfkeras
import joblib
import os
import glob
import sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
# !sudo kill -9 PID # clear GPU memory where 9 is PID number
# !watch -n0.1 nvidia-smi

# Load models

In [3]:
def get_model(config, network_type):
    '''Loads one of the saved models based on specified config'''
    
    return kerasload(af_dir + 'trained_models/saved_models/ensemble_' + str(config) + '_' + network_type +'.h5')

model1 = get_model(40,'B7')
model2 = get_model(13,'B0')
model3 = get_model(24,'B0')
model4 = get_model(6,'B0')
model5 = get_model(25,'B3')
model6 = get_model(2,'B2')
model7 = get_model(3,'B3')
model8 = get_model(8,'B7')
model9 = get_model(17,'b6')

# Create image generators

In [5]:
# Create data generators
def augment_data(directory, batch):
    '''Prepares train-time augmentation using given training and validations data)
    
    Returns train_data, val_data'''
    
    datagen1 = ImageDataGenerator(samplewise_center=True, samplewise_std_normalization=True)
    datagen2 = ImageDataGenerator(samplewise_center=True, samplewise_std_normalization=True)

    # Classes give the folders storing the two different categories
    train_data = datagen1.flow_from_directory(directory + '/validation',
                                             target_size=(224,224), batch_size = batch, classes=['authentic','fake'])
    
    val_data = datagen2.flow_from_directory(directory + '/test',
                                             target_size=(224,224), batch_size = batch, classes=['authentic','fake'])
    
    return train_data, val_data

train_data, val_data = augment_data('../../forensics_split', batch=128)

Found 21291 images belonging to 2 classes.
Found 23054 images belonging to 2 classes.


# Instantiate KerasMembers
This will find all training and validation predictions. A fit method can then be applied to obtain to the members to create the ensemble.

In [21]:
member1 = KerasMember(name="model1", keras_model=model1, train_batches=train_data, val_batches=val_data)
member2 = KerasMember(name="model2", keras_model=model2, train_batches=train_data, val_batches=val_data)
member3 = KerasMember(name="model3", keras_model=model3, train_batches=train_data, val_batches=val_data)
member4 = KerasMember(name="model4", keras_model=model4, train_batches=train_data, val_batches=val_data)
member5 = KerasMember(name="model5", keras_model=model5, train_batches=train_data, val_batches=val_data)
member6 = KerasMember(name="model6", keras_model=model6, train_batches=train_data, val_batches=val_data)
member7 = KerasMember(name="model7", keras_model=model7, train_batches=train_data, val_batches=val_data)
member8 = KerasMember(name="model8", keras_model=model8, train_batches=train_data, val_batches=val_data)
member9 = KerasMember(name="model9", keras_model=model9, train_batches=train_data, val_batches=val_data)



In [7]:
# Save Keras members
if not os.path.exists(af_dir + 'trained_models/saved_models/ensemble_members'):
    os.makedirs(af_dir + 'trained_models/saved_models/ensemble_members')
member1.save(af_dir + 'trained_models/saved_models/ensemble_members')
member2.save(af_dir + 'trained_models/saved_models/ensemble_members')
member3.save(af_dir + 'trained_models/saved_models/ensemble_members')
member4.save(af_dir + 'trained_models/saved_models/ensemble_members')
member5.save(af_dir + 'trained_models/saved_models/ensemble_members')
member6.save(af_dir + 'trained_models/saved_models/ensemble_members')
member7.save(af_dir + 'trained_models/saved_models/ensemble_members')
member8.save(af_dir + 'trained_models/saved_models/ensemble_members')
member9.save(af_dir + 'trained_models/saved_models/ensemble_members')

In [11]:
member1 = KerasMember.load(af_dir + 'trained_models/saved_models/ensemble_members/model1')
member2 = KerasMember.load(af_dir + 'trained_models/saved_models/ensemble_members/model2')
member3 = KerasMember.load(af_dir + 'trained_models/saved_models/ensemble_members/model3')
member4 = KerasMember.load(af_dir + 'trained_models/saved_models/ensemble_members/model4')
member5 = KerasMember.load(af_dir + 'trained_models/saved_models/ensemble_members/model5')
member6 = KerasMember.load(af_dir + 'trained_models/saved_models/ensemble_members/model6')
member7 = KerasMember.load(af_dir + 'trained_models/saved_models/ensemble_members/model7')
member8 = KerasMember.load(af_dir + 'trained_models/saved_models/ensemble_members/model8')
member9 = KerasMember.load(af_dir + 'trained_models/saved_models/ensemble_members/model9')

Loaded model1
Loaded model2
Loaded model3
Loaded model4
Loaded model5
Loaded model6
Loaded model7
Loaded model8
Loaded model9


# Fit weighted average ensemble

In [12]:
wAvgEnsemble = DirichletEnsemble()
wAvgEnsemble.add_members([member1, member2, member3, member4, member5, member6, member7, member8, member9])
wAvgEnsemble.fit()
print(wAvgEnsemble.describe())
print('Best weights:',wAvgEnsemble.bestweights)

model1 - Weight: 0.0089 - roc_auc_score: 0.9912
model2 - Weight: 0.1149 - roc_auc_score: 0.9917
model3 - Weight: 0.1228 - roc_auc_score: 0.9923
model4 - Weight: 0.0770 - roc_auc_score: 0.9923
model5 - Weight: 0.0158 - roc_auc_score: 0.9909
model6 - Weight: 0.0127 - roc_auc_score: 0.9900
model7 - Weight: 0.1328 - roc_auc_score: 0.9926
model8 - Weight: 0.0113 - roc_auc_score: 0.9894
model9 - Weight: 0.5037 - roc_auc_score: 0.9933
DirichletEnsemble roc_auc_score: 0.9941
None
Best weights: [0.00893805 0.11491145 0.12284052 0.07697425 0.01582984 0.0126988
 0.13275678 0.01133924 0.50371107]


# Fit stacking ensemble

In [13]:
stack = StackEnsemble()
stack.model = RandomForestRegressor(verbose=0, n_estimators=200, 
                                  max_depth=15, n_jobs=20, min_samples_split=20)
stack.add_members([member1, member2, member3, member4, member5, member6, member7, member8, member9])
stack.fit()
stack.describe()

Calling predict
model1 - roc_auc_score: 0.9912
model2 - roc_auc_score: 0.9917
model3 - roc_auc_score: 0.9923
model4 - roc_auc_score: 0.9923
model5 - roc_auc_score: 0.9909
model6 - roc_auc_score: 0.9900
model7 - roc_auc_score: 0.9926
model8 - roc_auc_score: 0.9894
model9 - roc_auc_score: 0.9933
StackEnsemble roc_auc_score: 0.9924


0.9924405098140157

In [14]:
stack = StackEnsemble()
stack.model = RandomForestRegressor(verbose=0, n_estimators=200, 
                                  max_depth=15, n_jobs=20, min_samples_split=20)
stack.add_members([member1, member2, member3, member4, member5, member6, member7, member8, member9])
stack.fit()
stack.describe(metric='acc')

Calling predict
model1 - Test Accuracy: 0.9711
model2 - Test Accuracy: 0.9686
model3 - Test Accuracy: 0.9674
model4 - Test Accuracy: 0.9732
model5 - Test Accuracy: 0.9716
model6 - Test Accuracy: 0.9648
model7 - Test Accuracy: 0.9696
model8 - Test Accuracy: 0.9582
model9 - Test Accuracy: 0.9786
StackEnsemble Test Accuracy: 0.9812


0.9811746334692462

In [18]:
# Save predictions for video conversion
np.save(af_dir + 'trained_models/saved_models/ensemble_members/stack_preds', stack.predictions)

# 3 Level Stacking

In [16]:
stack3 = StackEnsemble()

# 2nd Level Meta-Learner
estimators = [
    ('rf', RandomForestClassifier(verbose=0, n_estimators=200, max_depth=15, n_jobs=20, min_samples_split=30)),
    ('etr', ExtraTreesClassifier(verbose=0, n_estimators=200, max_depth=10, n_jobs=20, min_samples_split=20))
]
# 3rd Level Meta-Learner
clf = StackingClassifier(
    estimators=estimators, final_estimator=LogisticRegression()
)

stack3.model = clf
stack3.add_members([member1, member2, member3, member4, member5, member6, member7, member8, member9])
stack3.fit()
stack3.describe()

Calling predict
model1 - roc_auc_score: 0.9912
model2 - roc_auc_score: 0.9917
model3 - roc_auc_score: 0.9923
model4 - roc_auc_score: 0.9923
model5 - roc_auc_score: 0.9909
model6 - roc_auc_score: 0.9900
model7 - roc_auc_score: 0.9926
model8 - roc_auc_score: 0.9894
model9 - roc_auc_score: 0.9933
StackEnsemble roc_auc_score: 0.9808


0.9808322141483957

In [19]:
# Save predictions for video conversion
np.save(af_dir + 'trained_models/saved_models/ensemble_members/stack_level_3_preds', stack3.predictions)