In [1]:
import argparse
import os
import re
import time

import torch
import pandas as pd
from kernel_utils import *
from training.zoo.classifiers import DeepFakeClassifier

import cv2
import numpy as np
import torch
from PIL import Image
from tqdm import tqdm
from albumentations.augmentations.functional import image_compression
from facenet_pytorch.models.mtcnn import MTCNN
from concurrent.futures import ThreadPoolExecutor
from torchvision.transforms import Normalize

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
normalize_transform = Normalize(mean, std)

In [2]:
def _predict_on_video(face_extractor, video_path, batch_size, input_size, models, strategy=np.mean, apply_compression=False):
    
    batch_size *= 4
    try:
        faces = face_extractor.process_video(video_path)
        if len(faces) > 0:
            x = np.zeros((batch_size, input_size, input_size, 3), dtype=np.uint8)
            n = 0
            for frame_data in faces:
                for face in frame_data["faces"]:
                    resized_face = isotropically_resize_image(face, input_size)
                    resized_face = put_to_center(resized_face, input_size)
                    if apply_compression:
                        resized_face = image_compression(resized_face, quality=90, image_type=".jpg")
                    if n + 1 < batch_size:
                        x[n] = resized_face
                        n += 1
                    else:
                        pass
            if n > 0:
                x = torch.tensor(x, device="cuda").float()
                # Preprocess the images.
                x = x.permute((0, 3, 1, 2))
                for i in range(len(x)):
                    x[i] = normalize_transform(x[i] / 255.)
                # Make a prediction, then take the average.
                with torch.no_grad():
                    preds = []
                    for model in models:
                        y_pred = model(x[:n].half())
                        y_pred = torch.sigmoid(y_pred.squeeze())
                        bpred = y_pred[:n].cpu().numpy()
                        preds.append(strategy(bpred))
                    return np.mean(preds)
    except Exception as e:
        print("Prediction error on video %s: %s" % (video_path, str(e)))
        
    return 0.5

def _predict_on_videos(face_extractor, videos, input_size, num_workers, test_dir, frames_per_video, models,
                         strategy=np.mean,
                         apply_compression=False):
    def process_file(i):
        filename = videos[i]
        y_pred = _predict_on_video(face_extractor=face_extractor, video_path=os.path.join(test_dir, filename),
                                  input_size=input_size,
                                  batch_size=frames_per_video,
                                  models=models, strategy=strategy, apply_compression=apply_compression)
        return y_pred
    
    with ThreadPoolExecutor(max_workers=num_workers) as pool:
        with tqdm(total=len(videos)) as progress:
            futures = []
            for video in range(len(videos)):
                future = pool.submit(process_file, video)
                future.add_done_callback(lambda x : progress.update())
                futures.append(future)
            
            results = []
            for future in futures:
                result = future.result()
                results.append(result)
        
    return results

In [3]:
# Python Version : 3.7.0

test_list = [36,37,38,39,40,41,42,43,44]
done_list = []

MODEL="b7_888_DeepFakeClassifier_resnest269e_0_37"
ENCODER="resnest269e"
TESTDIR="/workspace/dataset/test/"

def predict(models, test_dir, encoder, output="submission.csv", weights_dir="weights"):
    models = []
    model_paths = [os.path.join(weights_dir, model) for model in models]
    for path in model_paths:
        model = DeepFakeClassifier(encoder=encoder).to("cuda")
        print("loading state dict {}".format(path))
        checkpoint = torch.load(path, map_location="cpu")
        state_dict = checkpoint.get("state_dict", checkpoint)
        model.load_state_dict({re.sub("^module.", "", k): v for k, v in state_dict.items()}, strict=False)
        model.eval()
        del checkpoint
        models.append(model.half())

    frames_per_video = 32
    video_reader = VideoReader()
    video_read_fn = lambda x: video_reader.read_frames(x, num_frames=frames_per_video)
    face_extractor = FaceExtractor(video_read_fn)
    input_size = 380
    strategy = confident_strategy
    stime = time.time()

    test_videos = sorted([x for x in os.listdir(test_dir) if x[-4:] == ".mp4"])
    print("Predicting {} videos".format(len(test_videos)))
    predictions = _predict_on_videos(face_extractor=face_extractor, input_size=input_size, models=models,
                                       strategy=strategy, frames_per_video=frames_per_video, videos=test_videos,
                                       num_workers=6, test_dir=test_dir)
    submission_df = pd.DataFrame({"filename": test_videos, "label": predictions})
    submission_df.to_csv(output, index=False)
    print("Elapsed:", time.time() - stime)
    
def inference_on_folder(number):
    test_dir = f"{TESTDIR}dfdc_train_part_{number}" # Input path of test datas
    output = f"{number}.csv"
    print(f"Start inference {test_dir}")
    predict(models=MODEL, test_dir=test_dir, encoder=ENCODER, output=output)

def main():
    for case in test_list:
        if case in done_list: continue
        inference_on_folder(case)

In [None]:
main()

Start inference /workspace/dataset/test/dfdc_train_part_37


  0%|          | 0/2655 [00:00<?, ?it/s]

Predicting 2655 videos


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  7%|▋         | 192/2655 [06:36<58:39,  1.43s/it]  