In [None]:
!pip install transformers --quiet

In [None]:
import os
import glob
import random
from tqdm.notebook import tqdm
import cv2
import sklearn.metrics
import pandas as pd
import re
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GroupShuffleSplit
from transformers import get_linear_schedule_with_warmup

import torch
import torch.nn as nn
import torch.nn.functional as F

import warnings
warnings.filterwarnings("ignore")

# Setup Data

In [None]:
meta = glob.glob('../input/deepfake-detection-faces-*/*.csv')
meta.sort(key=lambda f: int(re.sub('\D', '', f)))

dfs = []
for path in meta:
    df = pd.read_csv(path)
    df['path'] = ''
    path = path.split("/")[:-1]
    path = path[0] + '/' + path[1] + '/' + path[2] + '/'
    for i in range(len(df)):
        df.loc[i]['path'] = f'{path}{df.loc[i]["filename"][:-4]}'
    dfs.append(df)

train_df = pd.concat(dfs)
train_df = train_df.reset_index(drop=True)
len(train_df)

In [None]:
part = 16
for j in range((39-16)+1):
    if part+j != 17:
        meta = pd.read_csv(f'../input/dfdc-part-{part+j}/images/metadata{part+j}.csv')
    else:
        meta = pd.read_csv(f'../input/dfdc-part-{part+j}/images/metadata{part+j}.json', index_col=0)
    meta['path'] = ''
    print(part+j)
    del_idxs = []
    for i in range(len(meta)):
        if os.path.isdir(f'../input/dfdc-part-{part+j}/images/{meta.loc[i]["filename"][:-4]}'):
            if len(os.listdir(f'../input/dfdc-part-{part+j}/images/{meta.loc[i]["filename"][:-4]}')) < 10:
                del_idxs.append(i)
            else:
                meta.loc[i]['path'] = f'../input/dfdc-part-{part+j}/images/{meta.loc[i]["filename"][:-4]}'
        else:
            del_idxs.append(i)
    print(del_idxs)
    for idx in del_idxs:
        meta = meta.drop(idx)
    train_df = pd.concat([train_df,meta])
    train_df = train_df.reset_index(drop=True)
len(train_df)

In [None]:
dfs = []
part = 40
for j in range(10):
    if part+j != 17:
        meta = pd.read_csv(f'../input/dfdc-part-{part+j}/images/metadata{part+j}.csv')
    else:
        meta = pd.read_csv(f'../input/dfdc-part-{part+j}/images/metadata{part+j}.json', index_col=0)
    meta['path'] = ''
    print(part+j)
    del_idxs = []
    for i in range(len(meta)):
        if os.path.isdir(f'../input/dfdc-part-{part+j}/images/{meta.loc[i]["filename"][:-4]}'):
            if len(os.listdir(f'../input/dfdc-part-{part+j}/images/{meta.loc[i]["filename"][:-4]}')) < 10:
                del_idxs.append(i)
            else:
                meta.loc[i]['path'] = f'../input/dfdc-part-{part+j}/images/{meta.loc[i]["filename"][:-4]}'
        else:
            del_idxs.append(i)
    print(del_idxs)
    for idx in del_idxs:
        meta = meta.drop(idx)
    dfs.append(meta)
val_df = pd.concat(dfs)
val_df = val_df.reset_index(drop=True)
print(len(val_df))

In [None]:
tr_df = train_df
te_df = val_df

In [None]:
train_f = tr_df.loc[tr_df['label']=='FAKE']
train_r = tr_df.loc[tr_df['label']=='REAL']

# Dataset
Single image

In [None]:
from torch.utils.data import Dataset, DataLoader
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

class ImageDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        video = self.df.iloc[idx]
        
        imgs = glob.glob(f'{video["path"]}/*')
        if len(imgs) < 1:
            print(video["path"])
        
        bad = []
        for im in imgs:
            if len(im.split('_')) > 1:
               bad.append(im)
        for im in bad:
            imgs.remove(im)
        
        img_path = random.sample(imgs, 1)[0]
        img = cv2.cvtColor(cv2.imread(img_path),cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (224, 224))

        if self.transform is not None:
          res = self.transform(image=img)
          img = res['image']
        
        img = np.rollaxis(img, -1, 0)
        
        label = video['label']
        labels = 1
        if label == 'FAKE':
            labels = 1
        else:
            labels = 0
        labels = np.array(labels).astype(np.float32)
        return [img, labels]

# Model

In [None]:
!pip install pytorchcv --quiet
from pytorchcv.model_provider import get_model
# model = get_model("seresnext50_32x4d", pretrained=True)
# model = get_model("xception", pretrained=True)
# model = get_model("inceptionv3", pretrained=True)
# model = get_model("inceptionresnetv2", pretrained=True)
# model = get_model("mobilenet_w1", pretrained=True)
model = get_model("efficientnet_b1", pretrained=True)
model = nn.Sequential(*list(model.children())[:-1]) # Remove original output layer

# model[0].final_block.pool = nn.Sequential(nn.AdaptiveAvgPool2d(1)) # xcep
model[0].final_pool = nn.Sequential(nn.AdaptiveAvgPool2d(1)) #effi, incep

class Head(torch.nn.Module):
  def __init__(self, in_f, out_f):
    super(Head, self).__init__()
    
    self.f = nn.Flatten()
    self.l = nn.Linear(in_f, 512)
    self.d = nn.Dropout(0.75)
    self.o = nn.Linear(512, out_f)
    self.b1 = nn.BatchNorm1d(in_f)
    self.b2 = nn.BatchNorm1d(512)
    self.r = nn.ReLU()

  def forward(self, x):
    x = self.f(x)
    x = self.d(x)

    x = self.l(x)
    x = self.r(x)
    x = self.d(x)

    out = self.o(x)
    return out

class FCN(torch.nn.Module):
  def __init__(self, base, in_f):
    super(FCN, self).__init__()
    self.base = base
    self.h1 = Head(in_f, 1)
  
  def forward(self, x):
    x = self.base(x)
    return self.h1(x)

model = FCN(model, 1280) # effi
# model = FCN(model, 1536) # incep-res
# model = FCN(model, 2048) # xcep

# LRCN Dataset and Model (unused)

In [None]:
# from torch.utils.data import Dataset, DataLoader
# mean = [0.485, 0.456, 0.406]
# std = [0.229, 0.224, 0.225]

# class ImageDataset(Dataset):
#     def __init__(self, df, transform=None):
#         self.df = df
#         self.transform = transform

#     def __len__(self):
#         return len(self.df)
    
#     def __getitem__(self, idx):
#         video = self.df.iloc[idx]
        
#         imgs = glob.glob(f'{video["path"]}/*')
#         if len(imgs) < 1:
#             print(video["path"])
#         imgs.sort(key=lambda f: int(re.sub('\D', '', f)))
        
#         bad = []
#         for im in imgs:
#             if len(im.split('_')) > 1:
#                bad.append(im)
#         for im in bad:
#             imgs.remove(im)
        
#         img_paths = imgs[:10]
    
#         faces = []
#         for img_path in img_paths:
#             img = cv2.cvtColor(cv2.imread(img_path),cv2.COLOR_BGR2RGB)
#             img = cv2.resize(img, (150, 150))

#             if self.transform is not None:
#               res = self.transform(image=img)
#               img = res['image']
        
#             img = np.rollaxis(img, -1, 0)
            
#             faces.append(img)
#         faces = np.array(faces)
        
#         label = video['label']
#         labels = 1
#         if label == 'FAKE':
#             labels = 1
#         else:
#             labels = 0
#         labels = np.array([labels]).astype(np.float32)
#         return [faces, labels]

In [None]:
# !pip install pytorchcv --quiet
# from pytorchcv.model_provider import get_model
# # model = get_model("xception", pretrained=True)
# model = get_model("efficientnet_b1", pretrained=True)
# model = nn.Sequential(*list(model.children())[:-1]) # Remove original output layer
# model[0].final_pool = nn.AdaptiveAvgPool2d(1)
# # model[0].final_block.pool = nn.Sequential(nn.AdaptiveAvgPool2d(1))

# class LRCN(nn.Module):
#     def __init__(self, base, in_f, out_f):
#         super(LRCN, self).__init__()
#         self.cnn = base
        
#         self.LSTM = nn.LSTM(
#             input_size=in_f,
#             hidden_size=256,
#             num_layers=1,
#             batch_first=True
#         )

#         self.f1 = nn.Linear(256, 128)
#         self.f2 = nn.Linear(128, out_f)
#         self.r = nn.ReLU()
#         self.d = nn.Dropout(0.5)
        
#     def forward(self, x):
#         batch_size, timesteps, C, H, W = x.size()
#         x = x.view(batch_size * timesteps, C, H, W)
#         x = self.cnn(x)
#         x = x.view(batch_size, timesteps, -1)
#         self.LSTM.flatten_parameters()
#         x, (hn,hc) = self.LSTM(x)
#         x = self.d(self.r(self.f1(x[:,-1,:])))
#         x = self.f2(x)
#         return x

# model = LRCN(model, 1280, 1)

# Train functions

In [None]:
def criterion1(pred1, targets):
  l1 = F.binary_cross_entropy(F.sigmoid(pred1), targets)
  return l1

def train_model(epoch, optimizer, scheduler=None, history=None):
    model.train()
    total_loss = 0
    
    t = tqdm(train_loader)
    for i, (img_batch, y_batch) in enumerate(t):
        img_batch = img_batch.cuda().float()
        y_batch = y_batch.cuda().float()

        optimizer.zero_grad()
        
        rand = np.random.rand()
        if rand < 0.4:
            images, targets = cutmix(img_batch, y_batch, 0.4)
            output1 = model(images)
            loss = cutmix_criterion(output1, targets)
            loss = loss[0]
        elif rand < 0.8:
            images, targets = mixup(img_batch, y_batch, 0.4)
            output1 = model(images)
            loss = mixup_criterion(output1, targets)
            loss = loss[0]
        else:
            out = model(img_batch)
            loss = criterion1(out, y_batch)

        total_loss += loss
        t.set_description(f'Epoch {epoch+1}/{n_epochs}, LR: %6f, Loss: %.4f'%(optimizer.state_dict()['param_groups'][0]['lr'],total_loss/(i+1)))

        if history is not None:
          history.loc[epoch + i / len(train_loader), 'train_loss'] = loss.data.cpu().numpy()
          history.loc[epoch + i / len(train_loader), 'lr'] = optimizer.state_dict()['param_groups'][0]['lr']

        loss.backward()
        optimizer.step()
        if scheduler is not None:
          scheduler.step()

def evaluate_model(epoch, scheduler=None, history=None):
    model.eval()
    loss = 0
    pred = []
    real = []
    with torch.no_grad():
        for img_batch, y_batch in tqdm(val_loader):
            img_batch = img_batch.cuda().float()
            y_batch = y_batch.cuda().float()

            o1 = model(img_batch)
            l1 = criterion1(o1, y_batch)
            loss += l1
            
            for j in o1:
              pred.append(F.sigmoid(j))
            for i in y_batch:
              real.append(i.data.cpu())
    
    pred = [p.data.cpu().numpy() for p in pred]
    pred2 = pred
    pred = [np.round(p) for p in pred]
    pred = np.array(pred)
    acc = sklearn.metrics.recall_score(real, pred, average='macro')

    real = [r.item() for r in real]
    pred2 = np.array(pred2).clip(0.01, 0.99)
    kaggle = sklearn.metrics.log_loss(real, pred2)

    loss /= len(val_loader)
    
    if history is not None:
        history.loc[epoch, 'dev_loss'] = loss.cpu().numpy()
    
    if scheduler is not None:
      scheduler.step(loss)

    print(f'Dev loss: %.4f, Acc: %.6f, Kaggle: %.6f'%(loss,acc,kaggle))
    
    return loss

# Mixup/Cutmix

In [None]:
def ohem_loss( rate, cls_pred, cls_target ):
    batch_size = cls_pred.size(0)
    ohem_cls_loss = F.binary_cross_entropy(F.sigmoid(cls_pred), cls_target)
    return ohem_cls_loss

def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

def cutmix(data, targets1, alpha):
    indices = torch.randperm(data.size(0))
    shuffled_data = data[indices]
    shuffled_targets1 = targets1[indices]

    lam = np.random.beta(alpha, alpha)
    bbx1, bby1, bbx2, bby2 = rand_bbox(data.size(), lam)
    data[:, :, bbx1:bbx2, bby1:bby2] = data[indices, :, bbx1:bbx2, bby1:bby2]
    # adjust lambda to exactly match pixel ratio
    lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (data.size()[-1] * data.size()[-2]))

    targets = [targets1, shuffled_targets1, lam]
    return data, targets

# loss 
def cutmix_criterion(preds1, targets, rate=1.0):
    targets1, targets2, lam = targets[0], targets[1], targets[2]
    criterion = ohem_loss
    return [ lam * criterion(rate, preds1, targets1) + (1 - lam) * criterion(rate, preds1, targets2) ]


def mixup(data, targets1, alpha):
    indices = torch.randperm(data.size(0))
    shuffled_data = data[indices]
    shuffled_targets1 = targets1[indices]

    lam = np.random.beta(alpha, alpha)
    data = data * lam + shuffled_data * (1 - lam)
    targets = [targets1, shuffled_targets1, lam]

    return data, targets


def mixup_criterion(preds1, targets, rate=1.0):
    targets1, targets2, lam = targets[0], targets[1], targets[2]
    criterion = ohem_loss
    return [ lam * criterion(rate, preds1, targets1) + (1 - lam) * criterion(rate, preds1, targets2) ]

# Augmentation

In [None]:
import albumentations as A

train_transform = A.Compose([
    # A.ShiftScaleRotate(p=0.3, scale_limit=0.25, border_mode=1, rotate_limit=10),
    A.HorizontalFlip(p=0.2),
#     A.RandomBrightnessContrast(p=0.3, brightness_limit=0.5, contrast_limit=0.5),
#     A.OneOf([
#       A.JpegCompression(quality_lower=8, quality_upper=30, p=1.0),
#       A.Downscale(scale_min=0.5, scale_max=0.75, p=1.0)
#     ], p=0.2),
#     A.RandomCrop(110, 110, p=0.2),
#     A.Normalize(always_apply=True)
])

val_transform = A.Compose([
    A.Normalize(always_apply=True)
])
val_dataset = ImageDataset(te_df, transform=None)

In [None]:
fake_batch = train_f.sample(len(train_r)).reset_index(drop=True)
train_ = pd.concat([fake_batch, train_r])
train_ = train_.sample(frac=1).reset_index(drop=True)
print(train_['label'].value_counts())

train_dataset = ImageDataset(train_, transform=train_transform)

nrow, ncol = 3, 5
fig, axes = plt.subplots(nrow, ncol, figsize=(20, 8))
axes = axes.flatten()
for i, ax in enumerate(axes):
    images, label = train_dataset[i]
    image = np.rollaxis(images, 0, 3)
    ax.imshow(image)
    ax.set_title(f'label: {label}')
plt.tight_layout()

# Train

In [None]:
import gc

history = pd.DataFrame()
history2 = pd.DataFrame()

torch.cuda.empty_cache()
gc.collect()

best = 1e10
n_epochs = 20

batch_size = 64
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

model = model.cuda()

optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, mode='min', factor=0.7, verbose=True, min_lr=1e-5)

for epoch in range(n_epochs):
    torch.cuda.empty_cache()
    gc.collect()
    
    fake_batch = train_f.sample(len(train_r)).reset_index(drop=True)
    train_ = pd.concat([fake_batch, train_r])
    train_ = train_.sample(frac=1).reset_index(drop=True)
    
    train_dataset = ImageDataset(train_, transform=train_transform)
    train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
    
    train_model(epoch, optimizer, scheduler=scheduler, history=history)
    
    loss = evaluate_model(epoch, scheduler=None, history=history2)
    
    if loss < best:
      best = loss
      print(f'Saving best model...')
      torch.save(model.state_dict(), f'model_{epoch+1}.pth')

In [None]:
history2.plot()