### Vamos carregar algumas arquiteturas que foram treinadas e vamos testá-las no dataset de vídeos.

In [1]:
import cv2

import pandas as pd

from facenet_pytorch import MTCNN

from PIL import Image
import glob, os

import torch
import torch.nn as nn
import torchvision

Vamos carregar um modelo ResNet18 treinado

In [2]:
resnet18 = torchvision.models.resnet18(pretrained=False)
in_features = resnet18.fc.in_features
resnet18.fc = nn.Linear(in_features, 2)

resnet18.load_state_dict(torch.load('./Saved Models/resnet18_balanced_unfreezed_F97_R97_A97.pt'))

<All keys matched successfully>

Definimos um device onde a validação será rodada.

In [3]:
# Definimos um device onde os tensors estarão sendo processados
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

Running on device: cuda:0


Criamos uma lista de todos as pastas que contém os vídeos para testar.

In [4]:
# Cria uma lista de todas as pastas disponíveis para treinamento
folders = next(os.walk('./Kaggle Dataset/'))[1]
folders[:5]

['dfdc_train_part_0',
 'dfdc_train_part_1',
 'dfdc_train_part_10',
 'dfdc_train_part_11',
 'dfdc_train_part_12']

Definimos algumas classes e funções para nos auxiliar.

In [62]:
class Videos():
    def __init__(self, folder_path):
        # Guarda o folder_path
        self.folder_path = folder_path
        
        # Guarda a lista de todos os arquivos de videos dentro do folder_path
        self.video_files = glob.glob(folder_path + '/*.mp4')
        
        # Lê o arquivo JSON que contém as informações dos deepfakes naquela pasta
        self.metadata = pd.read_json(folder_path + '/metadata.json').transpose() # Essa transposiçao eh feita pois as colunas e as linhas estao trocadas
        
    def getRandomVideo(self):
        video_path = random.choice(self.video_files)
        video_name = os.path.basename(video_path)
        label = self.metadata.loc[video_name].label
        
        return video_path, video_name, label
        
    def getRealVideo(self, video_name):
        real_video_name = self.metadata.loc[video_name].original
        # Verifica se é NaN, pois caso seja o nome original é o próprio video real
        if pd.isna(real_video_name):
            real_video_name = video_name
        real_video_path = folder_path + '/' + real_video_name
        return real_video_path, real_video_name, "REAL"
    
    def getAllVideosPath(self):
        for video_name, columns in self.metadata.iterrows():
            yield self.folder_path + '/' + video_name, video_name, columns[0] # Label

def showVideo(video_path, label):
    cap = cv2.VideoCapture(video_path)

    # Configura a cor a ser colocada na LABEL
    if label == 'REAL':
        color = (0, 255, 0) # Verde
    else:
        color = (0, 0, 255) # Vermelho    

    while(cap.isOpened()):
        ret, frame = cap.read()
        if ret:
            #frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            boxes, _ = mtcnn.detect(Image.fromarray(frame))
            if boxes is not None:
                for box in boxes:
                    cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), color=[0, 255, 0], thickness=5)
                    # Escreve a label no vídeo sob a cabeça do indivíduo
                    cv2.putText(img=frame, text=label, org=(box[0], box[1]), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=color, thickness=2)

            frame = cv2.resize(frame, (1280, 720))
            cv2.imshow('frame', frame)
            #cv2.waitKey(10)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            cv2.destroyAllWindows()
            break

    cap.release()
    cv2.destroyAllWindows()
    
def predictVideo(video_path, padding=0, size=224, check_every_frame=15, model=resnet18):
    model = model.to(device)
    model.eval()
    
    cap = cv2.VideoCapture(video_path)
    transform = torchvision.transforms.ToTensor()
    face = None
    video_frame = 1
    predictions = [0, 0]

    while(cap.isOpened()):
        ret, frame = cap.read()
        if ret:
            if video_frame == 1 or video_frame % check_every_frame == 0:
                boxes = None
                boxes, _ = mtcnn.detect(Image.fromarray(frame))
                if boxes is not None:
                    for box in boxes: 
                        face = frame[
                            int(max(box[1] - padding, 0)):int(max(box[3] + padding, 0)), 
                            int(max(box[0] - padding, 0)):int(max(box[2] + padding, 0))
                        ]
                        cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), color=[0, 255, 0], thickness=5)
                        
                    # Predição
                    prediction = model(getYUVTensor(face, size, transform).to(device)).max(dim=1)[1]
                    predictions[prediction.item()] += 1
            frame = cv2.resize(frame, (1280, 720))
            #cv2.imshow('frame', frame)
            video_frame += 1
        else:
            break

    cap.release()
    cv2.destroyAllWindows()
    return predictions

def getYUVTensor(face, size, transform):
    face = cv2.cvtColor(face, cv2.COLOR_BGR2YCrCb)
    face = cv2.resize(face, (size, size))[:, :, 0]
    face_tensor = transform(face)
    face_tensor = torch.cat((face_tensor, face_tensor, face_tensor)).unsqueeze(0)
    return face_tensor

Defini-se uma função getVeredict que recebe o vetor de predições 'Fake' e 'Real' e devolve o veredito.

Nesse caso, o veredito é apenas a predição que está em maior quantidade no vídeo.

In [6]:
def getVeredict(predictions):
    prediction_dict = {0: 'FAKE',
                       1: 'REAL'}
    veredict = prediction_dict[predictions.index(max(predictions))]
    
    #if  max(predictions) / (min(predictions) + max(predictions)) > 0.65:
    #    return veredict
    #else:
    #    return "Uncertain... Most to {}, i guess?".format(veredict)
    return veredict

Coletamos uma pasta e vídeo aleatórios para teste.

In [98]:
# Definimos a coleta de uma pasta aleatória
import random
random_folder = random.choice(folders) + '/'

folder_path = './Kaggle Dataset/' + random_folder

# Instaciamos uma classe de Videos
videos = Videos(folder_path)

mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.68, 0.78, 0.78], min_face_size=150)

Mostra o vídeo para checagem visual.

In [99]:
video_path, video_name, label = videos.getRandomVideo()
showVideo(video_path, label)

Realiza as predições dele para cada 5 frames, devolve o vetor de predições e joga na função getVeredict para avaliação.

In [100]:
predictions = predictVideo(video_path, padding=10, size=224, check_every_frame=5, model=resnet18)
predictions, getVeredict(predictions)

([61, 0], 'FAKE')

Agora mostra o vídeo real do anterior.

In [101]:
real_video_path, real_video_name, label = videos.getRealVideo(video_name)
showVideo(real_video_path, label)

E novamente realiza as predições em cima dele.

In [102]:
predictions = predictVideo(real_video_path, padding=10, size=224, check_every_frame=5, model=resnet18)
predictions, getVeredict(predictions)

([8, 53], 'REAL')

Loop que valida todos os vídeos de todas as pastas do dataset do Kaggle e constrói uma matriz de confusão acumulativa das pastas.

In [23]:
mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.68, 0.78, 0.78], min_face_size=150)

total_fake_videos = 0
total_real_videos = 0
correct_fake = 0
correct_real = 0

for folder in folders:
    folder_path = './Kaggle Dataset/' + folder
    videos = Videos(folder_path)
    videos_generator = videos.getAllVideosPath()
    print("-------------- Início do folder {} --------------".format(folder))
    videos_quantity = len(videos.video_files)
    percentage = 10
    print_every = int(videos_quantity / (100/percentage))

    for n_video, video_data in enumerate(videos_generator):
        if n_video % print_every == 0:
            print("{}: {:.2f}%...".format(folder, round(n_video / videos_quantity * 100)))
        #print("Video: {}".format(video_data[1]))
        predictions = predictVideo(video_data[0], padding=10, size=224, check_every_frame=15, model=resnet18)
        
        if video_data[2] == 'FAKE':
            total_fake_videos += 1
            if getVeredict(predictions) == 'FAKE':
                correct_fake += 1
        else:
            total_real_videos += 1
            if getVeredict(predictions) == 'REAL':
                correct_real += 1
                
    data = {"Predicted_Real": [correct_real, total_fake_videos - correct_fake], "Predicted_Fake": [total_real_videos - correct_real, correct_fake], "Total": [total_real_videos, total_fake_videos]}
    accuracy = (correct_real + correct_fake) / (total_real_videos + total_fake_videos)
    print('Accuracy: {:.2f}%'.format(accuracy*100))
    data = pd.DataFrame(data, index=['REAL', 'FAKE'])
    print(data)
    data.to_csv('./Saved .csv/videos_confusion_matrix.csv', index=False)

-------------- Início do folder dfdc_train_part_0 --------------
dfdc_train_part_0: 0.00%...
dfdc_train_part_0: 10.00%...
dfdc_train_part_0: 20.00%...
dfdc_train_part_0: 30.00%...
dfdc_train_part_0: 40.00%...
dfdc_train_part_0: 50.00%...
dfdc_train_part_0: 60.00%...
dfdc_train_part_0: 70.00%...
dfdc_train_part_0: 80.00%...
dfdc_train_part_0: 90.00%...
dfdc_train_part_0: 100.00%...
Accuracy: 97.60%
      Predicted_Real  Predicted_Fake  Total
REAL              79              25     86
FAKE               7            1223   1248
-------------- Início do folder dfdc_train_part_1 --------------
dfdc_train_part_1: 0.00%...
dfdc_train_part_1: 10.00%...
dfdc_train_part_1: 20.00%...
dfdc_train_part_1: 30.00%...
dfdc_train_part_1: 40.00%...
dfdc_train_part_1: 50.00%...
dfdc_train_part_1: 60.00%...
dfdc_train_part_1: 70.00%...
dfdc_train_part_1: 80.00%...
dfdc_train_part_1: 90.00%...
dfdc_train_part_1: 99.00%...
Accuracy: 98.78%
      Predicted_Real  Predicted_Fake  Total
REAL             185   

KeyboardInterrupt: 

In [26]:
data = {"Predicted_Real": [correct_real, total_fake_videos - correct_fake], "Predicted_Fake": [total_real_videos - correct_real, correct_fake], "Total": [total_real_videos, total_fake_videos]}
accuracy1 = "{:.2f}%".format(correct_real / total_real_videos*100)
accuracy2 = "{:.2f}%".format(correct_fake / total_fake_videos*100)
data['Accuracy'] = [accuracy1, accuracy2]
accuracy = (correct_real + correct_fake) / (total_real_videos + total_fake_videos)
print('Accuracy: {:.2f}%'.format(accuracy*100))
data = pd.DataFrame(data, index=['REAL', 'FAKE'])
data
#data.to_csv('./Saved .csv/videos_confusion_matrix.csv', index=False)

Accuracy: 97.67%


Unnamed: 0,Predicted_Real,Predicted_Fake,Total,Accuracy
REAL,490,84,574,85.37%
FAKE,61,5596,5657,98.92%
