In [1]:
import keras

from sklearn.svm import SVC
from sklearn.metrics import classification_report

import numpy as np
import pickle

In [2]:
DS_CDFV1 = 'celeb_df_v1/'
DS_CDFV2 = 'celeb_df_v2/'

DS_ORGINAL = 'dataset_original/'
DS_SPLIT = 'dataset_split/'
DS_IFRAMES = 'dataset_iframes/'
DS_FACE = 'dataset_face/'
DS_FACE_IMG = 'dataset_face_img/'
DS_SRM_SNIPPETS = 'dataset_srm_snippets_5/'
DS_SEGMENTS = 'dataset_segments/'
DS_RAW = 'dataset_raw/'
DS_RESIDUALS = 'dataset_residuals/'
DS_TEMPORAL = 'dataset_temporal/'
MODELS = 'models/'


SEG_1 = 'seg_1/'
SEG_2 = 'seg_2/'
SEG_3 = 'seg_3/'
SEG_4 = 'seg_4/'
SEG_5 = 'seg_5/'

SEG = ['seg_1_', 'seg_2_', 'seg_3_', 'seg_4_', 'seg_5_']

DS_TRAIN = 'train_dataset/'
DS_TEST = 'test_dataset/'
DS_VAL = 'val_dataset/'

CLASS_FAKE = 'fake/'
CLASS_REAL = 'real/'


TOP_LEVEL_1 = [DS_SPLIT, DS_IFRAMES, DS_FACE, DS_FACE_IMG, DS_SRM_SNIPPETS]
TOP_LEVEL_2 = [DS_SEGMENTS, DS_RAW, DS_RESIDUALS]
SEGMENTS = [SEG_1, SEG_2, SEG_3, SEG_4, SEG_5]
SPLIT = [DS_TRAIN, DS_TEST, DS_VAL]
CLASS = [CLASS_REAL, CLASS_FAKE]

DATASET = [DS_CDFV1, DS_CDFV2]

In [3]:
TEST_SCORE_PATH = 'test_scores/'

In [4]:
class VideoScore():
    def __init__(self, filename, score, true_class, fake_on_lower_half = True):
        self.filename = filename
        self.score = score
        self.true_class = true_class
        self.fake_on_lower_half = fake_on_lower_half

    def get_filename(self):
        return self.filename
    
    def get_score(self):
        return self.score
    
    def get_true_class(self):
        return self.true_class
    
    def get_fake_on_lower_half(self):
        return self.fake_on_lower_half

# Get Model Scores

In [5]:
srm_scores_train = []
srm_scores_test = []

with open(TEST_SCORE_PATH + 'srm_scores_train', 'rb') as f:
    srm_scores_train = pickle.load(f)

with open(TEST_SCORE_PATH + 'srm_scores_test', 'rb') as f:
    srm_scores_test = pickle.load(f)

In [6]:
mesonet_scores_train = []
mesonet_scores_test = []

with open(TEST_SCORE_PATH + 'mesonet_scores_train', 'rb') as f:
    mesonet_scores_train = pickle.load(f)

with open(TEST_SCORE_PATH + 'mesonet_scores_test', 'rb') as f:
    mesonet_scores_test = pickle.load(f)

In [7]:
temporal_scores_train = []
temporal_scores_test = []

with open(TEST_SCORE_PATH + 'temporal_scores_train', 'rb') as f:
    temporal_scores_train = pickle.load(f)

with open(TEST_SCORE_PATH + 'temporal_scores_test', 'rb') as f:
    temporal_scores_test = pickle.load(f)

# TEMP SOLUTION
# temporal_scores_test = mesonet_scores_test
# temporal_scores_train = mesonet_scores_train 

In [8]:
print(len(srm_scores_test))
print(len(srm_scores_train))

print(len(mesonet_scores_test))
print(len(mesonet_scores_train))

print(len(temporal_scores_test))
print(len(temporal_scores_train))

518
4810
518
4810
518
4809


# Sort Scores by Video

In [21]:
mesonet_names = set([m.get_filename() for m in mesonet_scores_train])
temporal_names = set([t.get_filename() for t in temporal_scores_train])

extra_filename = mesonet_names.symmetric_difference(temporal_names)

m_train = []
s_train = []
t_train = []

for m in mesonet_scores_train:
    if m.get_filename() not in extra_filename:
        m_train.append(m)

for s in srm_scores_train:
    if s.get_filename() not in extra_filename:
        s_train.append(s)

for t in temporal_scores_train:
    if t.get_filename() not in extra_filename:
        t_train.append(t)

print(len(m_train))
print(len(s_train))
print(len(t_train))

3846
3846
3846


In [22]:
for m, s, t in zip(m_train, s_train, t_train):
    assert m.get_filename() == s.get_filename() == t.get_filename()
    assert m.get_true_class() == s.get_true_class() == t.get_true_class()

for m, s, t in zip(mesonet_scores_test, srm_scores_test, temporal_scores_test):
    assert m.get_filename() == s.get_filename() == t.get_filename()
    assert m.get_true_class() == s.get_true_class() == t.get_true_class()

# Averaging + Weighted Voting

In [23]:
def get_avg_score(s1, s2, s3, w1=0.33, w2=0.33, w3=0.33):
    return ((s1 * w1) + (s2 * w2) + (s3 * w3))

In [24]:
true_fake, true_real, false_fake, false_real = 0, 0, 0, 0

# TODO: Ensure that fake class ranges from 0 to 0.5

for m, s, t in zip(mesonet_scores_test, srm_scores_test, temporal_scores_test):
    avg_score = get_avg_score(m.get_score(), s.get_score(), t.get_score(), w1=0.33, w2=0.33, w3=0.33)

    
    if avg_score < 0.5 and m.get_true_class() == CLASS_FAKE:
        true_fake += 1

    elif avg_score >= 0.5 and m.get_true_class() == CLASS_REAL:
        true_real += 1

    elif avg_score < 0.5 and m.get_true_class() == CLASS_REAL:
        false_fake += 1

    else:
        false_real += 1
    

In [25]:
accuracy = (true_real + true_fake) / (true_fake + true_real + false_fake + false_real)

In [26]:
print(f'True Fake:{true_fake}')
print(f'True Real:{true_real}')
print(f'False Fake:{false_fake}')
print(f'False Real:{false_real}')
print(f'---')
print(f'Accuracy: {accuracy}')

True Fake:340
True Real:115
False Fake:63
False Real:0
---
Accuracy: 0.8783783783783784


# Majority Voting
Use this only when you have odd number of models

In [36]:
true_fake, true_real, false_fake, false_real = 0, 0, 0, 0

# TODO: Ensure that fake class ranges from 0 to 0.5

for m, s, t in zip(mesonet_scores_test, srm_scores_test, temporal_scores_test):
    pred_fake = 0

    # print(m.get_score())
    # print(s.get_score())
    # print(t.get_score())

    # print('---')

    if (m.get_score() < 0.5):
        pred_fake += 1

    if (s.get_score() < 0.5):
        pred_fake += 1

    if (t.get_score() < 0.5):
        pred_fake += 1

    if pred_fake >= 2 and m.get_true_class() == CLASS_FAKE:
        true_fake += 1
    elif pred_fake >= 2 and m.get_true_class() == CLASS_REAL:
        false_fake += 1
        continue

    if pred_fake <= 1 and m.get_true_class() == CLASS_REAL:
        true_real += 1
    else:
        false_real += 1


In [37]:
accuracy = (true_real + true_fake) / (true_fake + true_real + false_fake + false_real)

In [38]:
print(f'True Fake:{true_fake}')
print(f'True Real:{true_real}')
print(f'False Fake:{false_fake}')
print(f'False Real:{false_real}')
print(f'---')
print(f'Accuracy: {accuracy}')

True Fake:340
True Real:107
False Fake:71
False Real:340
---
Accuracy: 0.5209790209790209


# SVM Classifier

In [13]:
# Train SVM classifier using train scores, and test classifier using test scores

In [61]:
stream_predictions = []
actual_class = []
final_predictions = []

for m, s, t in zip(m_train, s_train, t_train):
    stream_predictions.append([m.get_score(), s.get_score(), t.get_score()])
    actual_class.append(m.get_true_class())

    # file = open(src_dir + filename)

    # for line in file:
    #     [pred1, pred2, pred3, actual] = np.float_(line.split())
    #     stream_predictions.append([pred1, pred2, pred3])
    #     actual_class.append(actual)

print(len(stream_predictions))
print(len(actual_class))

3846
3846


In [62]:
stream_testing = []
testing_class = []

for m, s, t in zip(mesonet_scores_test, srm_scores_test, temporal_scores_test):
    stream_testing.append([m.get_score(), s.get_score(), t.get_score()])
    testing_class.append(m.get_true_class())

print(len(stream_testing))
print(len(testing_class))

518
518


In [67]:
model = SVC(kernel='rbf', random_state=0,  probability=True)  
model.fit(stream_predictions, actual_class) 

In [68]:
final_predictions = model.predict(stream_testing)
# final_predictions

In [73]:
for a, t in zip(testing_class, final_predictions):
    print(f'{a} - {t}')

real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - fake/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - fake/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - fake/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - fake/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - fake/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ - real/
real/ 

In [82]:
final_predictions = model.predict_proba(stream_testing)
final_predictions

array([[0.09984447, 0.90015553],
       [0.89999988, 0.10000012],
       [0.09984447, 0.90015553],
       [0.89999988, 0.10000012],
       [0.09984447, 0.90015553],
       [0.89999988, 0.10000012],
       [0.09984447, 0.90015553],
       [0.89999988, 0.10000012],
       [0.09984447, 0.90015553],
       [0.89999988, 0.10000012],
       [0.09984447, 0.90015553],
       [0.89999988, 0.10000012],
       [0.09984447, 0.90015553],
       [0.89999988, 0.10000012],
       [0.09984447, 0.90015553],
       [0.89999988, 0.10000012]])

In [83]:
final_predictions = model.decision_function(stream_testing)
final_predictions

array([ 1.00000001, -0.99999995,  1.00000001, -0.99999995,  1.00000001,
       -0.99999995,  1.00000001, -0.99999995,  1.00000001, -0.99999995,
        1.00000001, -0.99999995,  1.00000001, -0.99999995,  1.00000001,
       -0.99999995])

In [None]:
with open(MODELS + '/SCORE_AGGREGATION/score_agg', 'wb') as f:
    pickle.dump(model, f)

In [None]:
with open(MODELS + '/SCORE_AGGREGATION/score_agg', 'rb') as f:
    model = pickle.load(f)

In [69]:
report = classification_report(testing_class, final_predictions)
print(report)

              precision    recall  f1-score   support

       fake/       0.94      1.00      0.97       340
       real/       1.00      0.88      0.93       178

    accuracy                           0.96       518
   macro avg       0.97      0.94      0.95       518
weighted avg       0.96      0.96      0.96       518



In [70]:
def metrics(actual, predicted):
    m = keras.metrics.CategoricalAccuracy()
    m.update_state(actual, predicted)
    print(f'Categorical Accuracy - {m.result().numpy()}')
    
    m = keras.metrics.Precision()
    m.update_state(actual, predicted)
    print(f'Precision - {m.result().numpy()}')

    m = keras.metrics.Recall()
    m.update_state(actual, predicted)
    print(f'Recall - {m.result().numpy()}')

    m = keras.metrics.AUC()
    m.update_state(actual, predicted)
    print(f'AUC - {m.result().numpy()}')

In [71]:
pred_float = []
actual_float = []

for p in final_predictions:
    if p == CLASS_FAKE:
        pred_float.append(0)
    else:
        pred_float.append(1)

for p in testing_class:
    if p == CLASS_FAKE:
        actual_float.append(0)
    else:
        actual_float.append(1)

metrics(actual_float, pred_float)

Categorical Accuracy - 1.0
Precision - 1.0
Recall - 0.8764045238494873
AUC - 0.9382022619247437
