In [1]:
import os 
import glob 
import json
import cv2
import numpy as np
import matplotlib.pyplot as plt
import copy
import h5py
import pickle
import pandas as pd

from math import log10, sqrt
from skimage.measure import compare_ssim
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler


## prepare training data...

In [2]:
good_frames = []

with open('good_frames.txt','r') as good_frames_file:
    for frame in good_frames_file:
        good_frames.append(frame.replace('\n',''))

parts = set()

video_dict = {}

for frame in good_frames:
    if ('fakes_low_quality' in frame):
        continue
    folder_base_name = os.path.dirname(frame)
    hdf5_base_name = os.path.basename(frame).split('_')[0] + '.h5'
    hdf5_key = os.path.join(folder_base_name, hdf5_base_name)
    
    current_frames = video_dict.get(hdf5_key,[])
    video_dict[hdf5_key] = current_frames
    current_frames.append(frame)
    


In [3]:
fakes = []
originals = []
labels = []

for key in video_dict:
    if ('fakes_low_quality' in key):
        continue
    if 'fakes' in key:
        label = 0
        fakes.append(key)
    elif 'originals' in key:
        label = 1
        originals.append(key)

In [4]:
original_labels = [1 for i in range(len(originals))]
fakes_labels = [0 for i in range(len(fakes))]

x_train_originals, x_test_originals, y_train_originals, y_test_originals = train_test_split(originals,original_labels,train_size=0.8,test_size=0.2, random_state=42)
x_train_fakes, x_test_fakes, y_train_fakes, y_test_fakes = train_test_split(fakes,fakes_labels,train_size=0.8,test_size=0.2, random_state=42)

x_train = x_train_originals + x_train_fakes
x_test = x_test_originals + x_test_fakes

y_train = y_train_originals + y_train_fakes
y_test = y_train_fakes + y_test_fakes

training_set = np.vstack([h5py.File(x,'r').get(x).value for x in x_train])
testing_set = np.vstack([h5py.File(x,'r').get(x).value for x in x_test])

training_data = training_set[:,0:259]
testing_data = testing_set[:,0:259]
training_labels = training_set[:,259:260]
testing_labels = testing_set[:,259:260]

  del sys.path[0]
  


## Scale the data...

In [5]:
scaler = StandardScaler()

training_data = scaler.fit_transform( training_data )
testing_data = scaler.transform( testing_data )

training_set = np.hstack([training_data,training_labels])
testing_set = np.hstack([testing_data, testing_labels])

training_labels = training_set[:,259:260].ravel()
testing_labels = testing_set[:,259:260].ravel()

## Train 3 SVM models (3 kernels)

In [6]:
clfs = svm.SVC(kernel='linear')
clfs.fit(training_data, training_labels)

SVC(kernel='linear')

In [7]:
clfr = svm.SVC(kernel='rbf')
clfr.fit(training_data, training_labels)

SVC()

In [8]:
clfsig = svm.SVC(kernel='sigmoid')
clfsig.fit(training_data, training_labels)

SVC(kernel='sigmoid')

## Save models for evaluation!

In [9]:
svm_linear = "svm_model_linear.pkl"
svm_rbf =  "svm_model_rbf.pkl"
svm_sig =  "svm_model_sig.pkl"

training_filename = "training_set.pkl"
testing_filename = "testing_set.pkl"
testing_associated_files = "testing_files.pkl"

with open(svm_linear, 'wb') as model_file:
    pickle.dump(clfs, model_file)
    
with open(svm_rbf, 'wb') as model_file:
    pickle.dump(clfr, model_file)
    
with open(svm_sig, 'wb') as model_file:
    pickle.dump(clfsig, model_file)

with open(training_filename, 'wb') as training_file:
    pickle.dump(training_set, training_file)
    
with open(testing_filename, 'wb') as testing_file:
    pickle.dump(testing_set, testing_file)
    
with open(testing_associated_files, 'wb') as testing_associated_file:
    pickle.dump(x_test, testing_associated_file)
    

## Done Training Data...