In [None]:
import matplotlib.pyplot as plt 
import os 
from tqdm import tqdm_notebook as tqdm 
import numpy as np 
import cv2 
import dlib 
face_detector = dlib.get_frontal_face_detector() 
from keras.applications.xception import preprocess_input 
import joblib 
import warnings
warnings.filterwarnings('ignore')
from keras import models

In [None]:
DATASET_PATHS = {
    'original': '/content/drive/MyDrive/Thesis/Faceforensics_2/original_sequences/youtube',
    'Deepfakes': '/content/drive/MyDrive/Thesis/Faceforensics_2/manipulated_sequences/Deepfakes',
    'Face2Face': '/content/drive/MyDrive/Thesis/Faceforensics_2/manipulated_sequences/Face2Face',
    'FaceSwap': '/content/drive/MyDrive/Thesis/Faceforensics_2/manipulated_sequences/FaceSwap'
}

In [None]:
def get_boundingbox(face, width, height, scale=1.3, minsize=None):
  x1 = face.left() # Taking lines numbers around face
  y1 = face.top()
  x2 = face.right()
  y2 = face.bottom()
  size_bb = int(max(x2 - x1, y2 - y1) * scale) # scaling size of box to 1.3
  if minsize:
    if size_bb < minsize:
      size_bb = minsize

  center_x, center_y = (x1 + x2) // 2, (y1 + y2) // 2
  # Check for out of bounds, x-y top left corner
  x1 = max(int(center_x - size_bb // 2), 0)
  y1 = max(int(center_y - size_bb // 2), 0)
  # Check for too big bb size for given x, y
  size_bb = min(width - x1, size_bb)
  size_bb = min(height - y1, size_bb)

  return x1, y1, size_bb


def get_predicition(image):
  height, width = image.shape[:2]
  try: # If in case face is not detected at any frame
    face = face_detector(image, 1)[0]  # Face detection
    x, y, size = get_boundingbox(face=face, width=width, height=height) # Calling to get bound box around the face
  except IndexError:
    pass
  cropped_face = image[y:y+size, x:x+size] # cropping the face 
  output,label = evaluate(cropped_face) # Sending the cropped face to get classifier result 
  font_face = cv2.FONT_HERSHEY_SIMPLEX # font settings
  thickness = 2
  font_scale = 1
  if label=='Real':
    color = (0,255, 0)
  else:
    color = (0, 0, 255)
  x = face.left()    # Setting the bounding box on uncropped image
  y = face.top()
  w = face.right() - x
  h = face.bottom() - y
  cv2.putText(image, label+'_'+str('%.2f'%output)+'%', (x, y+h+30), font_face, font_scale, color, thickness, 2) # Putting the label and confidence values

  return cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)# draw box over face


def evaluate(cropped_face):       
  img = cv2.resize(cropped_face, (299, 299))
  img = np.expand_dims(img, axis=0)
  img = preprocess_input(img) 
  res = model_Xc.predict(img)[0]
  if np.argmax(res)==1:
    label = 'Fake'
  else:
    label = 'Real'
  return res[np.argmax(res)]*100.0, label


def final_model(video_path,limit_frames):
  output_ = video_path.split("/")[-1].split(".")[-2]
  capture = cv2.VideoCapture(video_path)
  if capture.isOpened():
    _,image = capture.read()
    frame_width = int(capture.get(3))
    frame_height = int(capture.get(4))
    out = cv2.VideoWriter(output_+'_output.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame_width,frame_height))
  else:
    _ = False
  i=1   
  while (_):
    _, image = capture.read()
    classified_img = get_predicition(image)
    out.write(classified_img)
    if i%10 == 0:
      print("Number of frames complted:{}".format(i))
    if i==limit_frames:
      break
    i=i+1
  capture.release()

In [None]:
train_original = (os.listdir(DATASET_PATHS['original']+'/raw/images/'))[:20]
train_Deepfakes = (os.listdir(DATASET_PATHS['Deepfakes']+'/raw/images/'))[:6]
train_FaceSwap = (os.listdir(DATASET_PATHS['FaceSwap']+'/raw/images/'))[:6]
train_Face2Face = (os.listdir(DATASET_PATHS['Face2Face']+'/raw/images/'))[:6]
train = [train_original,train_Deepfakes,train_FaceSwap,train_Face2Face]
types = ['original','Deepfakes','FaceSwap','Face2Face']

In [None]:
cv_original = (os.listdir(DATASET_PATHS['original']+'/raw/images/'))[20:][:2]
cv_Deepfakes = (os.listdir(DATASET_PATHS['Deepfakes']+'/raw/images/'))[6:][:1]
cv_FaceSwap = (os.listdir(DATASET_PATHS['FaceSwap']+'/raw/images/'))[6:][:1]
cv_Face2Face = (os.listdir(DATASET_PATHS['Face2Face']+'/raw/images/'))[6:][:1]
cv = [cv_original,cv_Deepfakes,cv_FaceSwap,cv_Face2Face]

In [None]:
test_original = (os.listdir(DATASET_PATHS['original']+'/raw/images/'))[-6:-1]
test_Deepfakes = (os.listdir(DATASET_PATHS['Deepfakes']+'/raw/images/'))[-3:-1]
test_FaceSwap = (os.listdir(DATASET_PATHS['FaceSwap']+'/raw/images/'))[-3:-1]
test_Face2Face = (os.listdir(DATASET_PATHS['Face2Face']+'/raw/images/'))[-4:-1]
test = [test_original,test_Deepfakes,test_FaceSwap,test_Face2Face]

In [None]:
def track_face(split_type,Split,output_mkdir=True):
  for part in zip(split_type,types):
    for video in tqdm(part[0]):
      if output_mkdir==True:
        os.makedirs('/content/drive/MyDrive/Data_2/'+Split+'/'+part[1]+'/'+video, exist_ok=True)

      input_path = DATASET_PATHS[part[1]]+'/raw/images/'+video
      output_path = '/content/drive/MyDrive/Data_2/'+Split+'/'+part[1]+'/'+video
      images = os.listdir(input_path)
      images.sort(key=lambda x: os.path.getmtime(input_path+'/'+x))

      for img in images[10:40]: #Taking 30  frames from each video
        image = cv2.imread(input_path+'/'+img)
        faces = face_detector(image, 1)
        height, width = image.shape[:2]

        try: # If in case face is not detected at any frame 
          x, y, size = get_boundingbox(face=faces[0], width=width, height=height)
        except IndexError:
          continue

        cropped_face = image[y:y+size, x:x+size]
        cv2.imwrite(output_path+'/'+img,cropped_face)

In [None]:
track_face(split_type=train,Split='train')

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

In [None]:
track_face(split_type=test,Split='test')

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
track_face(split_type=cv,Split='cv')

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
#Created to shuffle the videos
train_=[]
for ind,i in enumerate(train):
  for j in i:
    train_.append(j+'_'+types[ind])
test_=[]
for ind,i in enumerate(test):
  for j in i:
    test_.append(j+'_'+types[ind])
        
cv_=[]
for ind,i in enumerate(cv):
  for j in i:
    cv_.append(j+'_'+types[ind])

In [None]:
TRAIN_DATADIR = "/content/drive/MyDrive/Data_2/train"
TEST_DATADIR = "/content/drive/MyDrive/Data_2/test"
CV_DATADIR = '/content/drive/MyDrive/Data_2/cv'

In [None]:
def create_data(DATADIR,shuffled_list):
  data = []
  for name in shuffled_list: 
    label = name.split('_')[-1] 
    class_num = 0
    folder_name = name.split('_')[0]
    if len(name.split('_'))==3:
      class_num=1
      folder_name = name.split('_')[0]+'_'+name.split('_')[1]
    files = os.listdir(DATADIR+'/'+label+'/'+folder_name)
    path = DATADIR+'/'+label+'/'+folder_name
    files.sort(key=lambda x: os.path.getmtime(DATADIR+'/'+label+'/'+folder_name+'/'+x))     
    for img in tqdm(files):           
      try:
        img_array = cv2.imread(os.path.join(path,img))
        img_array=cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)
        new_array = cv2.resize(img_array, (299, 299))
        data.append([new_array, class_num])
      except Exception as e:
        print(str(e))
            
  print("Data gathering completed......\n Separating features and class lables")
  X = []
  y = []
  for row in tqdm(data):
    X.append(row[0])
    y.append(row[1])
  X = np.array(X).reshape(-1, 299, 299, 3)
  print("Done")
  return X,y

In [None]:
np.random.shuffle(train_)
X_train,Y_train = create_data(DATADIR=TRAIN_DATADIR, shuffled_list=train_)

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Data gathering completed......
 Separating features and class lables


  0%|          | 0/1140 [00:00<?, ?it/s]

Done


In [None]:
np.random.shuffle(test_)
X_test,Y_test = create_data(DATADIR=TEST_DATADIR, shuffled_list=test_)

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Data gathering completed......
 Separating features and class lables


  0%|          | 0/360 [00:00<?, ?it/s]

Done


In [None]:
X_cv,Y_cv = create_data(DATADIR=CV_DATADIR, shuffled_list=cv_)

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Data gathering completed......
 Separating features and class lables


  0%|          | 0/150 [00:00<?, ?it/s]

Done


In [None]:
X_train = preprocess_input(X_train)
X_cv = preprocess_input(X_cv)
X_test = preprocess_input(X_test)

In [None]:
!pwd

/content


In [None]:
%cd /content/drive/MyDrive/T2

/content/drive/MyDrive/T2


In [None]:
joblib.dump(X_train, 'x_train.pkl')
joblib.dump(X_test, 'x_test.pkl')
joblib.dump(Y_test, 'y_test.pkl')
joblib.dump(Y_train, 'y_train.pkl')
joblib.dump(X_cv, 'x_cv.pkl')
joblib.dump(Y_cv, 'y_cv.pkl')

['y_cv.pkl']