Vamos carregar algumas arquiteturas que foram treinadas e vamos testá-las no dataset de vídeos.

In [3]:
import cv2

import pandas as pd

from facenet_pytorch import MTCNN

from PIL import Image
import glob, os

import torch
import torch.nn as nn
import torchvision

Vamos carregar um modelo resnet18 testado

In [4]:
resnet18 = torchvision.models.resnet18(pretrained=False)
in_features = resnet18.fc.in_features
resnet18.fc = nn.Linear(in_features, 2)

resnet18.load_state_dict(torch.load('./Saved Models/resnet18_balanced_unfreezed_F97_R97_A97.pt'))

<All keys matched successfully>

In [5]:
# Definimos um device onde os tensors estarão sendo processados
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

Running on device: cuda:0


In [7]:
# Cria uma lista de todas as pastas disponíveis para treinamento
folders = next(os.walk('./Kaggle Dataset/'))[1]
folders[:5]

['dfdc_train_part_0',
 'dfdc_train_part_1',
 'dfdc_train_part_10',
 'dfdc_train_part_11',
 'dfdc_train_part_12']

In [121]:
class Videos():
    def __init__(self, folder_path):
        # Guarda o folder_path
        self.folder_path = folder_path
        
        # Guarda a lista de todos os arquivos de videos dentro do folder_path
        self.video_files = glob.glob(folder_path + '/*.mp4')
        
        # Lê o arquivo JSON que contém as informações dos deepfakes naquela pasta
        self.metadata = pd.read_json(folder_path + '/metadata.json').transpose() # Essa transposiçao eh feita pois as colunas e as linhas estao trocadas
        
    def getRandomVideo(self):
        video_path = random.choice(self.video_files)
        video_name = os.path.basename(video_path)
        label = self.metadata.loc[video_name].label
        
        return video_path, video_name, label
        
    def getRealVideo(self, video_name):
        real_video_name = self.metadata.loc[video_name].original
        # Verifica se é NaN, pois caso seja o nome original é o próprio video real
        if pd.isna(real_video_name):
            real_video_name = video_name
        real_video_path = folder_path + '/' + real_video_name
        return real_video_path, real_video_name
    
    def getAllVideosPath(self):
        for video_name, columns in self.metadata.iterrows():
            yield self.folder_path + '/' + video_name, video_name, columns[0] # Label

def showVideo(video_path, label):
    cap = cv2.VideoCapture(video_path)

    # Configura a cor a ser colocada na LABEL
    if label == 'REAL':
        color = (0, 255, 0) # Verde
    else:
        color = (0, 0, 255) # Vermelho    

    while(cap.isOpened()):
        ret, frame = cap.read()
        if ret:
            #frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            boxes, _ = mtcnn.detect(Image.fromarray(frame))
            if boxes is not None:
                for box in boxes:
                    cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), color=[0, 255, 0], thickness=5)
                    # Escreve a label no vídeo sob a cabeça do indivíduo
                    cv2.putText(img=frame, text=label, org=(box[0], box[1]), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=color, thickness=2)

            frame = cv2.resize(frame, (1280, 720))
            cv2.imshow('frame', frame)
            #cv2.waitKey(10)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            cv2.destroyAllWindows()
            break

    cap.release()
    cv2.destroyAllWindows()
    
def showAndPredictVideo(video_path, label, padding=0, size=224, check_every_frame=15):
    cap = cv2.VideoCapture(video_path)
    transform = torchvision.transforms.ToTensor()
    face = None
    video_frame = 1
    
    # Configura a cor a ser colocada na LABEL
    if label == 'REAL':
        color = (0, 255, 0) # Verde
    else:
        color = (0, 0, 255) # Vermelho    

    while(cap.isOpened()):
        ret, frame = cap.read()
        if ret:
            if video_frame == 1 or video_frame % check_every_frame == 0:
                boxes = None
                boxes, _ = mtcnn.detect(Image.fromarray(frame))
                if boxes is not None:
                    for box in boxes: 
                        face = frame[
                            int(max(box[1] - padding, 0)):int(max(box[3] + padding, 0)), 
                            int(max(box[0] - padding, 0)):int(max(box[2] + padding, 0))
                        ]
                        cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), color=[0, 255, 0], thickness=5)
                    face_tensor = getYUVTensor(face, size, transform)
                    cv2.putText(img=frame, text=label, org=(box[0], box[1]), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=color, thickness=2)
            frame = cv2.resize(frame, (1280, 720))
            cv2.imshow('frame', frame)
            video_frame += 1
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            break

    cap.release()
    cv2.destroyAllWindows()
    return face_tensor

In [96]:
test = torch.tensor([[[0.5, 0.1, 0.4],
                      [0.6, 0.1, 0.9],
                      [0.8, 0.3, 0.7]]])
torch.cat((test,test))

tensor([[[0.5000, 0.1000, 0.4000],
         [0.6000, 0.1000, 0.9000],
         [0.8000, 0.3000, 0.7000]],

        [[0.5000, 0.1000, 0.4000],
         [0.6000, 0.1000, 0.9000],
         [0.8000, 0.3000, 0.7000]]])

In [127]:
def getYUVTensor(face, size, transform):
    face = cv2.cvtColor(face, cv2.COLOR_BGR2YCrCb)
    face = cv2.resize(face, (size, size))[:, :, 0]
    face_tensor = transform(face)
    face_tensor = torch.cat((face_tensor, face_tensor, face_tensor))
    return face_tensor

In [64]:
# Definimos a coleta de uma pasta aleatória
import random
random_folder = random.choice(folders) + '/'

folder_path = './Kaggle Dataset/' + random_folder

# Instaciamos uma classe de Videos
videos = Videos(folder_path)

In [114]:
mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.68, 0.78, 0.78], min_face_size=150)

In [65]:
videos_generator = videos.getAllVideosPath()

In [128]:
video_path, video_name, label = next(videos_generator)
face_tensor = showAndPredictVideo(video_path, label, padding=10, size=224, check_every_frame=1)
face_tensor, face_tensor.shape

(tensor([[[0.1765, 0.1765, 0.1765,  ..., 0.1843, 0.1882, 0.1922],
          [0.1765, 0.1765, 0.1765,  ..., 0.1882, 0.1922, 0.1922],
          [0.1804, 0.1765, 0.1765,  ..., 0.1922, 0.1922, 0.1922],
          ...,
          [0.1882, 0.1882, 0.1882,  ..., 0.0196, 0.0196, 0.0196],
          [0.1843, 0.1843, 0.1882,  ..., 0.0196, 0.0196, 0.0196],
          [0.1843, 0.1843, 0.1882,  ..., 0.0196, 0.0196, 0.0196]],
 
         [[0.1765, 0.1765, 0.1765,  ..., 0.1843, 0.1882, 0.1922],
          [0.1765, 0.1765, 0.1765,  ..., 0.1882, 0.1922, 0.1922],
          [0.1804, 0.1765, 0.1765,  ..., 0.1922, 0.1922, 0.1922],
          ...,
          [0.1882, 0.1882, 0.1882,  ..., 0.0196, 0.0196, 0.0196],
          [0.1843, 0.1843, 0.1882,  ..., 0.0196, 0.0196, 0.0196],
          [0.1843, 0.1843, 0.1882,  ..., 0.0196, 0.0196, 0.0196]],
 
         [[0.1765, 0.1765, 0.1765,  ..., 0.1843, 0.1882, 0.1922],
          [0.1765, 0.1765, 0.1765,  ..., 0.1882, 0.1922, 0.1922],
          [0.1804, 0.1765, 0.1765,  ...,

In [116]:
video_path, video_name, label = next(videos_generator)
showVideo(video_path, label)