In [1]:
!pip install -q timm

[0m

In [2]:
# with open('../input/zalo2022-livenessdetection-darknet53-fold-0/Fold0/log.txt', 'r') as f:
#     content = f.read().split('\n')
    
# content

In [3]:
import os
import sys
import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader as DataLoader
from torch.utils.data import Dataset as Dataset
import cv2
import timm

import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
from torch.optim.lr_scheduler import ReduceLROnPlateau

from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
import math
import gc
import time

In [4]:
ckpt_path = '../input/zalo2022-livenessdetection-darknet53-fold-0/Fold0/best.pt'
ckpt = torch.load(ckpt_path)
ckpt.keys()

dict_keys(['epoch', 'state_dict', 'train_loss', 'valid_loss', 'auc_score', 'eer', 'opti', 'scheduler'])

In [5]:
backbone = 'darknet53'
# ROOT_DIR = '../input/yakiniku'
# VIDEO_DIR = os.path.join(ROOT_DIR, 'train_new/train/videos')
# CSV_PATH = os.path.join(ROOT_DIR, 'label_5folds.csv')
# PUBLIC_TEST_PATH = os.path.join(ROOT_DIR, 'public_test/public/videos')

# ROOT_DIR = '../input/zalo2022-pbl2'
# VIDEO_DIR = os.path.join(ROOT_DIR, 'public_test_2/videos')
# CSV_PATH = os.path.join(ROOT_DIR, 'label_5folds.csv')
# PUBLIC_TEST_PATH = os.path.join(ROOT_DIR, 'public_test_2/videos/')
PUBLIC_TEST_PATH = '../input/zalo2022-pbl2/public_test_2/public_test_2/videos'

In [6]:
# create sample submission
submission = pd.DataFrame()
fname_lst = []
public_test_files = os.listdir(PUBLIC_TEST_PATH)
for file in public_test_files:
    if file.endswith('.mp4'):
        fname_lst.append(file)
        
submission['fname'] = fname_lst

In [7]:
# config
DEVICE = torch.device('cuda:0')
EPOCHS = 50
FOLD_LST = [0,1,2,3,4]
# DIM = (320, 320)
DIM = (384, 384)
# DIM = (480, 480)
# DIM = (224, 224)
TRAIN_BATCH_SIZE = 16
# VALID_BATCH_SIZE = 2 * TRAIN_BATCH_SIZE
VALID_BATCH_SIZE = 1
LR = 1e-4
SAMPLE = None

valid_transform = A.Compose(
    [
        A.Resize(DIM[0], DIM[1], always_apply=True),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ]
)

In [8]:
class LivenessDataset(Dataset):
    def __init__(self, df, video_dir, take_frame = 5, transform=None):
        self.df = df.reset_index(drop = True)
        self.take_frame = take_frame
        self.video_dir = video_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        fname = self.df.iloc[idx]['fname']
        video_path = os.path.join(self.video_dir, fname)
        video = cv2.VideoCapture(video_path)
        image_lst = []
        frame_number = 0
        length = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
        frame_step = length // self.take_frame
        while video.isOpened():
            ret, frame = video.read()
            if ret:
                frame_number += 1
                if frame is not None and frame_number % frame_step == 0:
                    if self.transform is not None:
                        frame = self.transform(image=frame)["image"]
                    image_lst.append(frame)
            else:
                break
        video.release()
        image_lst = image_lst[:self.take_frame]
        return torch.stack(image_lst, axis =0)

In [9]:
# class LivenessDataset(Dataset):
#     def __init__(self, df, video_dir, take_frame = 5, transform=None):
#         self.df = df.reset_index(drop = True)
#         self.take_frame = take_frame
#         self.video_dir = video_dir
#         self.transform = transform

#     def __len__(self):
#         return len(self.df)

#     def __getitem__(self, idx):
#         fname = self.df.iloc[idx]['fname']
#         video_path = os.path.join(self.video_dir, fname)
#         video = cv2.VideoCapture(video_path)
#         image_lst = []
#         frame_number = 0
#         length = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
#         frame_step = length // self.take_frame
#         while video.isOpened():
#             ret, frame = video.read()
#             if ret:
#                 frame_number += 1
# #                 if frame is not None and frame_number % frame_step == 0:
#                 if self.transform is not None:
#                     frame = self.transform(image=frame)["image"]
#                 image_lst.append(frame)
#                 if frame_number == 5:
#                     break
#             else:
#                 break
#         video.release()
#         image_lst = image_lst[:self.take_frame]
#         return torch.stack(image_lst, axis =0)

In [10]:
class LivenessModel(torch.nn.Module):
    def __init__(self, pretrained_name = 'resnet50'):
        super(LivenessModel, self).__init__()
        self.backbone = timm.create_model(pretrained_name, pretrained=None)
        if pretrained_name == 'resnet50':
            self.in_feats = self.backbone.fc.in_features
            self.backbone.fc = torch.nn.Identity()
        if pretrained_name == 'darknet53':
            self.in_feats = self.backbone.head.fc.in_features
            self.backbone.head.fc = torch.nn.Identity()
    
        self.lstm = torch.nn.LSTM(self.in_feats, self.in_feats, 2,
                                  bidirectional = True, dropout = 0.3, batch_first = True)
        self.linear = torch.nn.Linear(self.in_feats * 2, 1)
    def forward(self, x):
        b, f, c, h, w = x.shape
        x = torch.reshape(x, (b * f, c, h, w))
        x = self.backbone(x)
        x = torch.reshape(x, (b, f, self.in_feats))
        output, (h, c) = self.lstm(x)
        x = output[:,-1,:]
        x = self.linear(x)
        return x

In [11]:
pbl_test_dataset = LivenessDataset(submission, PUBLIC_TEST_PATH, take_frame = 5, transform = valid_transform)

In [12]:
def infer_fn(model, dataloader, device):
    
    model.eval()
    pred_lst, time_lst = [], []
    with torch.no_grad():
        for i, batch in tqdm(enumerate(dataloader)):
            start = time.time()
            inp = batch
            inp = inp.to(device)
            output = model(inp)
            
            np_output = output.detach().cpu().numpy()
            pred_lst.append(np_output)
            
            del inp, output, np_output
            torch.cuda.empty_cache()
            gc.collect()
            end = time.time()
            time_lst.append(end - start)
    pred = np.concatenate(pred_lst, axis = 0)
    
    return pred, time_lst

In [13]:
test_dataloader = DataLoader(pbl_test_dataset, batch_size=VALID_BATCH_SIZE, shuffle=False)
ckpt = torch.load(ckpt_path)
state_dict = ckpt['state_dict']
best_eer = ckpt['eer']
print(best_eer)
model = LivenessModel(backbone)
model.load_state_dict(state_dict)
model = model.to(DEVICE)

0.0


In [14]:
sample = test_dataloader

In [15]:
pred, time_lst = infer_fn(model, test_dataloader, DEVICE)
pred.shape

486it [03:10,  2.55it/s]


(486, 1)

In [16]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [17]:
pred = sigmoid(pred)

In [18]:
submission['liveness_score'] = pred
submission['predict_time'] = time_lst
submission.to_csv('./submission.csv' ,index = False)

In [19]:
submission.head(5)

Unnamed: 0,fname,liveness_score,predict_time
0,123.mp4,2.1e-05,5.417344
1,479.mp4,0.842261,0.222521
2,28.mp4,0.995015,0.216825
3,410.mp4,0.998569,0.21768
4,408.mp4,0.002326,0.215135


In [20]:
submission

Unnamed: 0,fname,liveness_score,predict_time
0,123.mp4,0.000021,5.417344
1,479.mp4,0.842261,0.222521
2,28.mp4,0.995015,0.216825
3,410.mp4,0.998569,0.217680
4,408.mp4,0.002326,0.215135
...,...,...,...
481,217.mp4,0.943948,0.210051
482,151.mp4,0.000016,0.214522
483,135.mp4,0.004413,0.215135
484,25.mp4,0.000033,0.215487
