In [2]:
import sys
sys.path.append('../../Facial-Landmarks-Detector/')
sys.path.append('../../Revelio-LipsMovement/')
sys.path.append('../../Dynamic-texture-analysis-for-detecting-fake-faces-in-video-sequences/')
sys.path.append('../../Modules/dft/lib/')
sys.path.append('../../SBI2/')
sys.path.append('../../FaceDetection/')

In [5]:
from revelio import *

In [None]:
import sys
sys.path.append('../../Facial-Landmarks-Detector/')
sys.path.append('../../Revelio-LipsMovement/')
sys.path.append('../../Dynamic-texture-analysis-for-detecting-fake-faces-in-video-sequences/')
sys.path.append('../../Modules/dft/lib/')
sys.path.append('../../SBI2/')
sys.path.append('../../FaceDetection/')

from revel

In [None]:
import cv2
from matplotlib import pyplot as plt
import dlib
from landmarks_detector import *
from LipMovementClassifier import *
from FDA_Model import *
from DynamicTexture import *
from sbi_inference import *
from detect_face import *
import numpy as np
import joblib

In [None]:
def read_video_frames(videoCapture):
    #read frames
    grayFrames = []
    coloredFrames = []
    while True:
        ret, frame = videoCapture.read()
        if ret:
            #frame to gray scale
            coloredFrames.append(frame)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            grayFrames.append(frame)
        else:
            break
    return grayFrames, coloredFrames

In [None]:
def initialize_face_detector():
    face_detector = joblib.load('../../FaceDetection/hFeatures6/faceDetector2.joblib')
    all_classifiers = face_detector.classifier.strong_classifiers[0].weak_classifiers
    face_detector.classifier.strong_classifiers[0].weak_classifiers = all_classifiers[:80]
    face_detector.classifier.strong_classifiers[0].θ = np.sum(face_detector.classifier.strong_classifiers[0].alphas)/2
    face_detector.stride = 10
    face_detector.scale_dist = 1.25
    return face_detector


In [None]:
def detect_face_region(face_detector, grayFrame):
    original_size = grayFrame.shape
    maxdim = 320
    if grayFrame.shape[0] > maxdim or grayFrame.shape[1] > maxdim:
        if grayFrame.shape[0] > grayFrame.shape[1]:
            grayFrame = cv2.resize(grayFrame, (int(maxdim * grayFrame.shape[1] / grayFrame.shape[0]), maxdim))
        else:
            grayFrame = cv2.resize(grayFrame, (maxdim, int(maxdim * grayFrame.shape[0] / grayFrame.shape[1])))
    _, region, _, time= face_detector.find_face(grayFrame, min_size=50)

    #rescale region
    x1,y1,x2,y2 = region

    x1 = int(x1 * original_size[1] / grayFrame.shape[1])
    x2 = int(x2 * original_size[1] / grayFrame.shape[1])
    y1 = int(y1 * original_size[0] / grayFrame.shape[0])
    y2 = int(y2 * original_size[0] / grayFrame.shape[0])

    region = (x1,y1,x2,y2)
    return region

In [None]:
def get_face_frames(face_detector, grayFrames, coloredFrames):
    #detector = dlib.get_frontal_face_detector()
    faceFrames = []
    coloredFaceFrames = []
    for i in range(len(grayFrames)):
        frame = grayFrames[i]
        coloredFrame = coloredFrames[i]
        #face = detector(frame)[0]
        #x1, y1, x2, y2 = face.left(), face.top(), face.right(), face.bottom()
        x1,y1,x2,y2 = detect_face_region(face_detector, frame)

        faceFrames.append(frame[y1:y2, x1:x2])
        coloredFaceFrames.append(coloredFrame[y1:y2, x1:x2])
    
    #resize face frames to the minimum frace size
    minSize = min([face.shape[0] for face in faceFrames])
    faceFrames = [cv2.resize(face, (minSize, minSize)) for face in faceFrames]
    coloredFaceFrames = [cv2.resize(face, (minSize, minSize)) for face in coloredFaceFrames]

    return np.array(faceFrames), np.array(coloredFaceFrames)

In [None]:
def sbi_analysis(sbiModel, seqLength, coloredFaceFrames):
    #split coloredFaceFrames into sequences of 32 frames
    splitFrames = np.array([coloredFaceFrames[i:i+seqLength] for i in range(0, (len(coloredFaceFrames)//seqLength)*seqLength, seqLength)])
    sbi_predictions = []
    for sequence in splitFrames:
        result = sbiModel.infer(sequence, num_frames=seqLength)
        sbi_predictions.append(result)
    return sbi_predictions

In [None]:
def initializeLipsMovementModel():
    LipsModelsLocations = '../../Revelio-LipsMovement/trainedmodels/'
    LipModelsPaths = [LipsModelsLocations + 'deepfakes/lips_movements_classifer.pth', LipsModelsLocations + 'face2face/lips_movements_classifer.pth', LipsModelsLocations + 'faceswap/lips_movements_classifer.pth', LipsModelsLocations + 'neuraltextures/lips_movements_classifer.pth']
    lipsMovementModel = LipMovementClassifier(isPredictor=True, predictionMSTCNModelPaths=LipModelsPaths, featureExtractorModelPath='../../Revelio-LipsMovement/trainedmodels/resnet_feature_extractor.pth', mstcnConfigFilePath='../../Revelio-LipsMovement/models/configs/mstcn.json')
    return lipsMovementModel

In [None]:
def extract_analysis(allLipsPredictions, fdaPredictions, dynamicTexturePredictionsBinary,dynamicTexturePredictionsMulti, sbiPredictions):
    resultmap = {
        'LipDF': (1-allLipsPredictions[0]).flatten().tolist(),
        'LipDFAvg': 1-np.average(allLipsPredictions[0]),
        'LipF2F': (1-allLipsPredictions[1]).flatten().tolist(),
        'LipF2FAvg': 1-np.average(allLipsPredictions[1]),
        'LipFS': (1-allLipsPredictions[2]).flatten().tolist(),
        'LipFSAvg': 1-np.average(allLipsPredictions[2]),
        'LipNT': (1-allLipsPredictions[3]).flatten().tolist(),
        'LipNTAvg': 1-np.average(allLipsPredictions[3]),
        'FDA_DF': fdaPredictions['deepfake'],
        'FDA_F2F': fdaPredictions['face2face'],
        'FDA_FS': fdaPredictions['faceswap'],
        'FDA_NT': fdaPredictions['neuraltextures'],
        'DTBinaryDF': (dynamicTexturePredictionsBinary[0]).flatten().tolist(),
        'DTBinaryDFAvg': np.average(dynamicTexturePredictionsBinary[0]),
        'DTBinaryF2F': (dynamicTexturePredictionsBinary[1]).flatten().tolist(),
        'DTBinaryF2FAvg': np.average(dynamicTexturePredictionsBinary[1]),
        'DTBinaryFS': (dynamicTexturePredictionsBinary[2]).flatten().tolist(),
        'DTBinaryFSAvg': np.average(dynamicTexturePredictionsBinary[2]),
        'DTBinaryNT': (dynamicTexturePredictionsBinary[3]).flatten().tolist(),
        'DTBinaryNTAvg': np.average(dynamicTexturePredictionsBinary[3]),
        
        'DTMulti': (dynamicTexturePredictionsMulti).flatten().tolist(),

        'SBI': sbiPredictions,
        'SBIAvg': np.average(sbiPredictions)   
    }
    return resultmap

In [None]:
def analyze_video(videoCapture):
    #read video frames
    print('Reading video frames...')
    grayFrames, coloredFrames = read_video_frames(videoCapture)

    print('Detecting face region...')
    #initialize face detector 
    face_detector = initialize_face_detector()
    #detect and crop face region
    faceFrames, coloredFaceFrames = get_face_frames(face_detector, grayFrames, coloredFrames)
    
    print('Detecting landmarks...')
    #detect landmarks
    landmarksDetector = LandmarksDetector(isPredictor=True, modelspath='../../Facial-Landmarks-Detector/landmarksmodels')
    framesLandmarks = []
    for frame in faceFrames:
        framesLandmarks.append(landmarksDetector.predict(frame, (0, 0, frame.shape[1], frame.shape[0])))
    framesLandmarks = np.array(framesLandmarks)
    
    print('Lips Movement Analysis...')
    #Lips Movement Analysis 
    lipsMovementModel = initializeLipsMovementModel()
    allLipsPredictions = lipsMovementModel.predict(faceFrames, framesLandmarks)

    print('Frequency Domain Analysis...')
    #Frequency Domain Analysis
    fdaModel = FDA(model_path='../../Modules/dft/Models')
    fdaPredictions = fdaModel.predict(faceFrames)

    print('Dynamic Texture Analysis...')
    #Dynamic Texture Analysis
    dynamicTextureObjectBinary = dynamicTexture('cf23', 'binary', '../../Dynamic-texture-analysis-for-detecting-fake-faces-in-video-sequences/models/')
    dynamicTextureObjectMulti = dynamicTexture('cf23', 'multi', '../../Dynamic-texture-analysis-for-detecting-fake-faces-in-video-sequences/models/')
    dynamicTexturePredictionsBinary = dynamicTextureObjectBinary.predict(faceFrames, 30)
    dynamicTexturePredictionsMulti = dynamicTextureObjectMulti.predict(faceFrames, 30)

    print('SBI Analysis...')
    #SBI Analysis
    sbimodel = SBI_inference('../../SBI2/36_0.9899_val.tar')
    sbiPredictions = sbi_analysis(sbimodel, 32, coloredFaceFrames)

    return extract_analysis(allLipsPredictions, fdaPredictions, dynamicTexturePredictionsBinary, dynamicTexturePredictionsMulti, sbiPredictions)

In [1]:
from revelio import *

  "class": algorithms.Blowfish,


In [2]:
revelio = Revelio()
revelio.analyze_video(cv2.VideoCapture('002_006.mp4'))

Reading video frames...
Detecting face region...


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Detecting landmarks...
Lips Movement Analysis...
Frequency Domain Analysis...




Dynamic Texture Analysis...




SBI Analysis...
Loaded pretrained weights for efficientnet-b4


{'LipDF': [0.9024010300636292,
  0.8124421238899231,
  0.16847741603851318,
  1.0,
  0.999993622303009,
  1.0,
  0.9999570250511169,
  0.9999986886978149,
  1.0,
  1.0,
  1.0,
  0.9999997615814209],
 'LipDFAvg': 0.9069391414523125,
 'LipF2F': [0.05867832899093628,
  0.48605281114578247,
  0.0030385851860046387,
  0.025774776935577393,
  0.0006885528564453125,
  1.1920928955078125e-07,
  0.2463863492012024,
  0.8766398429870605,
  0.00032460689544677734,
  0.9371023178100586,
  0.7798152565956116,
  2.384185791015625e-07],
 'LipF2FAvg': 0.2845418453216553,
 'LipFS': [0.013775825500488281,
  0.9344635605812073,
  0.35719382762908936,
  0.018836677074432373,
  1.430511474609375e-06,
  0.45079827308654785,
  0.0013275146484375,
  0.0014240741729736328,
  0.011612355709075928,
  0.12604427337646484,
  0.6674356460571289,
  0.8218920230865479],
 'LipFSAvg': 0.2837337851524353,
 'LipNT': [0.9640939235687256,
  0.9168065190315247,
  0.9997347593307495,
  0.9965834617614746,
  0.114696443080902

In [None]:
analysisResult = analyze_video(cv2.VideoCapture('002_006.mp4'))

In [None]:
type(sbiPredictions)

In [None]:
resultMap = extract_analysis(allLipsPredictions, fdaPredictions, dynamicTexturePredictionsBinary,dynamicTexturePredictionsMulti, sbiPredictions)

In [None]:
resultMap

In [None]:
fdaPredictions['deepfake']

In [None]:
np.average(allLipsPredictions, axis=1)

In [None]:
#read video
cap = cv2.VideoCapture('002_006.mp4')

#read frames into numpy array
grayFrames = []
coloredFrames = []
while True:
    ret, frame = cap.read()
    if ret:
        #frame to gray scale
        coloredFrames.append(frame)
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        grayFrames.append(frame)
    else:
        break



In [None]:
grayFrames[0].shape[1]//4 

In [None]:
region

In [None]:
img =cv2.resize(grayFrames[0], (grayFrames[0].shape[1], grayFrames[0].shape[0]))

In [None]:
#plot rectangle on img
cv2.rectangle(img, (region[0], region[1]), (region[2], region[3]), (255, 255, 255), 2)

plt.imshow(img)

In [None]:
#crop face from frames
faceFrames = []
coloredFaceFrames = []
detector = dlib.get_frontal_face_detector()
for i in range(len(grayFrames)):
    frame = grayFrames[i]
    coloredFrame = coloredFrames[i]
    face = detector(frame)[0]
    x1 = face.left()
    y1 = face.top()
    x2 = face.right()
    y2 = face.bottom()
    faceFrames.append(frame[y1:y2, x1:x2])
    coloredFaceFrames.append(coloredFrame[y1:y2, x1:x2])

In [None]:
np.savez_compressed('coloredFaceFrames006.npz', frames=coloredFaceFrames)

In [None]:
# faceFrames = np.array(faceFrames)
# framesLandmarks = np.array(framesLandmarks)
faceFrames = np.load('faceFrames.npz')['frames']
framesLandmarks = np.load('framesLandmarks.npz')['landmarks']


In [None]:
lipsMovementModel = initializeLipsMovementModel()

In [None]:
results = lipsMovementModel.predict(faceFrames, framesLandmarks)

In [None]:
results[0].shape

In [None]:
print(faceFrames.shape)
print(framesLandmarks.shape)

In [None]:
coloredFaceFrames = np.load('coloredFaceFrames006.npz')['frames']

In [None]:
coloredFaceFrames.shape

In [None]:
splitFrames.shape

In [None]:
sbi_results