In [1]:
import keras

from sklearn.svm import SVC
from sklearn.metrics import classification_report, roc_auc_score


import numpy as np
import pickle

In [2]:
DS_CDFV1 = 'celeb_df_v1/'
DS_CDFV2 = 'celeb_df_v2/'

DS_ORGINAL = 'dataset_original/'
DS_SPLIT = 'dataset_split/'
DS_IFRAMES = 'dataset_iframes/'
DS_FACE = 'dataset_face/'
DS_FACE_IMG = 'dataset_face_img/'
DS_SRM_SNIPPETS = 'dataset_srm_snippets_5/'
DS_SEGMENTS = 'dataset_segments/'
DS_RAW = 'dataset_raw/'
DS_RESIDUALS = 'dataset_residuals/'
DS_TEMPORAL = 'dataset_temporal/'
MODELS = 'models/'


SEG_1 = 'seg_1/'
SEG_2 = 'seg_2/'
SEG_3 = 'seg_3/'
SEG_4 = 'seg_4/'
SEG_5 = 'seg_5/'

SEG = ['seg_1_', 'seg_2_', 'seg_3_', 'seg_4_', 'seg_5_']

DS_TRAIN = 'train_dataset/'
DS_TEST = 'test_dataset/'
DS_VAL = 'val_dataset/'

CLASS_FAKE = 'fake/'
CLASS_REAL = 'real/'


TOP_LEVEL_1 = [DS_SPLIT, DS_IFRAMES, DS_FACE, DS_FACE_IMG, DS_SRM_SNIPPETS]
TOP_LEVEL_2 = [DS_SEGMENTS, DS_RAW, DS_RESIDUALS]
SEGMENTS = [SEG_1, SEG_2, SEG_3, SEG_4, SEG_5]
SPLIT = [DS_TRAIN, DS_TEST, DS_VAL]
CLASS = [CLASS_REAL, CLASS_FAKE]

DATASET = [DS_CDFV1, DS_CDFV2]

In [3]:
TEST_SCORE_PATH = 'test_scores/'
SAVE_MODEL_PATH = 'models/SVM/'

In [4]:
class VideoScore():
    def __init__(self, filename, score, true_class, fake_on_lower_half = True):
        self.filename = filename
        self.score = score
        self.true_class = true_class
        self.fake_on_lower_half = fake_on_lower_half

    def get_filename(self):
        return self.filename
    
    def get_score(self):
        return self.score
    
    def get_true_class(self):
        return self.true_class
    
    def get_fake_on_lower_half(self):
        return self.fake_on_lower_half

# Get Model Scores

In [6]:
srm_scores_train = []
srm_scores_test = []

with open(TEST_SCORE_PATH + 'srm_scores_train', 'rb') as f:
    srm_scores_train = pickle.load(f)

with open(TEST_SCORE_PATH + 'srm_scores_test', 'rb') as f:
    srm_scores_test = pickle.load(f)

In [7]:
mesonet_scores_train = []
mesonet_scores_test = []

with open(TEST_SCORE_PATH + 'mesonet_pruned_train.pkl', 'rb') as f:
    mesonet_scores_train = pickle.load(f)

with open(TEST_SCORE_PATH + 'mesonet_pruned_test.pkl', 'rb') as f:
    mesonet_scores_test = pickle.load(f)

In [8]:
temporal_scores_train = []
temporal_scores_test = []

with open(TEST_SCORE_PATH + 'temporal_scores_train', 'rb') as f:
    temporal_scores_train = pickle.load(f)

with open(TEST_SCORE_PATH + 'temporal_scores_test', 'rb') as f:
    temporal_scores_test = pickle.load(f)

In [9]:
print(len(srm_scores_test))
print(len(srm_scores_train))

print(len(mesonet_scores_test))
print(len(mesonet_scores_train))

print(len(temporal_scores_test))
print(len(temporal_scores_train))

518
4810
518
4810
518
4809


# Sort Scores by Video

In [10]:
mesonet_names = set([m.get_filename() for m in mesonet_scores_train])
temporal_names = set([t.get_filename() for t in temporal_scores_train])
srm_names = set([s.get_filename() for s in srm_scores_train])

len(temporal_names.symmetric_difference(srm_names))

1927

In [11]:
mesonet_names = set([m.get_filename() for m in mesonet_scores_train])
temporal_names = set([t.get_filename() for t in temporal_scores_train])
srm_names = set([s.get_filename() for s in srm_scores_train])

extra_filename = mesonet_names.symmetric_difference(temporal_names)
extra_filename_2 = mesonet_names.symmetric_difference(srm_names)

m_train = []
s_train = []
t_train = []

for m in sorted(mesonet_scores_train, key=lambda x: x.get_filename()):
    if m.get_filename() not in extra_filename:
        m_train.append(m)

for s in sorted(srm_scores_train, key=lambda x: x.get_filename()):
    if s.get_filename() not in extra_filename:
        s_train.append(s)

for t in sorted(temporal_scores_train, key=lambda x: x.get_filename()):
    if t.get_filename() not in extra_filename:
        t_train.append(t)

print(len(m_train))
print(len(s_train))
print(len(t_train))

3846
3846
3846


In [12]:
mesonet_scores_test.sort(key=lambda x: x.get_filename())
srm_scores_test.sort(key=lambda x: x.get_filename())
temporal_scores_test.sort(key=lambda x: x.get_filename())

In [13]:
for m, s, t in zip(m_train, s_train, t_train):
    assert m.get_filename() == s.get_filename() == t.get_filename()
    assert m.get_true_class() == s.get_true_class() == t.get_true_class()

for m, s, t in zip(mesonet_scores_test, srm_scores_test, temporal_scores_test):
    assert m.get_filename() == s.get_filename() == t.get_filename()
    assert m.get_true_class() == s.get_true_class() == t.get_true_class()

# Averaging + Weighted Voting

In [23]:
def get_avg_score(s1, s2, s3, w1=0.33, w2=0.33, w3=0.33):
    return ((s1 * w1) + (s2 * w2) + (s3 * w3))

In [25]:
true_fake, true_real, false_fake, false_real = 0, 0, 0, 0
y_true = []
y_score = []

for m, s, t in zip(mesonet_scores_test, srm_scores_test, temporal_scores_test):
    avg_score = get_avg_score(m.get_score(), s.get_score(), t.get_score())
    
    if avg_score < 0.5 and m.get_true_class() == CLASS_FAKE:
        true_fake += 1

    elif avg_score >= 0.5 and m.get_true_class() == CLASS_REAL:
        true_real += 1

    elif avg_score < 0.5 and m.get_true_class() == CLASS_REAL:
        false_fake += 1

    else:
        false_real += 1

    y_true.append(0 if m.get_true_class() == CLASS_FAKE else 1)
    y_score.append(0 if avg_score < 0.5 else 1)
    

In [28]:
accuracy = (true_real + true_fake) / (true_fake + true_real + false_fake + false_real)
precision = true_fake / (true_fake + false_fake)
recall = true_fake / (true_fake + false_real)

auc = roc_auc_score(y_true, y_score)

In [30]:
print(f'True Fake:{true_fake}')
print(f'True Real:{true_real}')
print(f'False Fake:{false_fake}')
print(f'False Real:{false_real}')
print(f'---')
print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'AUC Score: {auc:.4f}')

True Fake:340
True Real:133
False Fake:45
False Real:0
---
Accuracy: 0.9131
Precision: 0.8831
Recall: 1.0000
AUC Score: 0.8736


# Majority Voting
Use this only when you have odd number of models

In [41]:
true_fake, true_real, false_fake, false_real = 0, 0, 0, 0
y_true = []
y_score = []

for m, s, t in zip(mesonet_scores_test, srm_scores_test, temporal_scores_test):
    pred_fake = 0

    y_true.append(0 if m.get_true_class() == CLASS_FAKE else 1)

    if (m.get_score() < 0.5):
        pred_fake += 1

    if (s.get_score() < 0.5):
        pred_fake += 1

    if (t.get_score() < 0.5):
        pred_fake += 1

    if pred_fake >= 2:
        y_score.append(0)

        if m.get_true_class() == CLASS_FAKE:
            true_fake += 1
        else:
            false_fake += 1

    else:
        y_score.append(1)
        
        if m.get_true_class() == CLASS_REAL:
            true_real += 1
        else:
            false_real += 1

In [44]:
accuracy = (true_real + true_fake) / (true_fake + true_real + false_fake + false_real)
precision = true_fake / (true_fake + false_fake)
recall = true_fake / (true_fake + false_real)

auc = roc_auc_score(y_true, y_score)

In [45]:
print(f'True Fake:{true_fake}')
print(f'True Real:{true_real}')
print(f'False Fake:{false_fake}')
print(f'False Real:{false_real}')
print(f'---')
print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'AUC Score: {auc:.4f}')

True Fake:340
True Real:122
False Fake:56
False Real:0
---
Accuracy: 0.8919
Precision: 0.8586
Recall: 1.0000
AUC Score: 0.8427


# SVM Classifier

In [14]:
stream_predictions = []
actual_class_train = []
final_predictions = []

for m, s, t in zip(m_train, s_train, t_train):
    stream_predictions.append([m.get_score(), s.get_score(), t.get_score()])
    actual_class_train.append(m.get_true_class())

In [15]:
stream_testing = []
actual_class_test = []

for m, s, t in zip(mesonet_scores_test, srm_scores_test, temporal_scores_test):
    stream_testing.append([m.get_score(), s.get_score(), t.get_score()])
    actual_class_test.append(m.get_true_class())

In [16]:
model = SVC(kernel='rbf', random_state=0,  probability=True)  
model.fit(stream_predictions, actual_class_train) 

In [17]:
with open(SAVE_MODEL_PATH + 'svm.pkl', 'wb') as f:
    pickle.dump(model, f)

In [18]:
with open(SAVE_MODEL_PATH + 'svm.pkl', 'rb') as f:
    model = pickle.load(f)

In [53]:
final_predictions = model.predict(stream_testing)

In [54]:
final_predictions_proba = model.predict_proba(stream_testing)

In [55]:
final_predictions_decision_fn = model.decision_function(stream_testing)

In [56]:
with open(TEST_SCORE_PATH + 'score_aggregation_svm.pkl', 'wb') as f:
    pickle.dump(final_predictions, f)

In [None]:
with open(TEST_SCORE_PATH + 'score_aggregation_svm.pkl', 'rb') as f:
    final_predictions = pickle.load(f)

In [57]:
report = classification_report(actual_class_test, final_predictions)
print(report)

              precision    recall  f1-score   support

       fake/       0.93      1.00      0.96       340
       real/       0.99      0.87      0.92       178

    accuracy                           0.95       518
   macro avg       0.96      0.93      0.94       518
weighted avg       0.95      0.95      0.95       518



In [62]:
def metrics(actual, predicted):
    m = keras.metrics.BinaryAccuracy()
    m.update_state(actual, predicted)
    print(f'Binary Accuracy - {m.result().numpy():.4f}')
    
    m = keras.metrics.Precision()
    m.update_state(actual, predicted)
    print(f'Precision - {m.result().numpy():.4f}')

    m = keras.metrics.Recall()
    m.update_state(actual, predicted)
    print(f'Recall - {m.result().numpy():.4f}')

    m = keras.metrics.AUC()
    m.update_state(actual, predicted)
    print(f'AUC - {m.result().numpy():.4f}')

In [63]:
pred = [0 if x == CLASS_FAKE else 1 for x in final_predictions]
actual = [0 if x == CLASS_FAKE else 1 for x in actual_class_test]


metrics(pred, actual)

Binary Accuracy - 0.9517
Precision - 0.8652
Recall - 0.9935
AUC - 0.9637
