In [None]:
# ! pip install kornia
# ! pip install -U albumentations[imgaug]
# ! pip install neptune-client

Experiment training dataset build by using 
 2000 Synthetic images from [Impaintin](https://www.grip.unina.it/download/vipcup2022/gated_convolution_inpainting.zip) dataset | places in the folder names "Generated" \
 and 5000 real images from [COCO2017](http://images.cocodataset.org/zips/val2017.zip) dataset | placed in the folder name "Real"

data set folder structure
```
|SampleData |- label.csv
            |- Generated /
            |- Real /
```

__Download & save the backborne model before any experiment run ; \
xception model : http://data.lip6.fr/cadene/pretrainedmodels/xception-43020ad28.pth \
and save in the 'C:\Users\deela\.cache\torch\hub\checkpoints' directory__

In [12]:
import os
import time
import json 
import logging
import warnings
import numpy as np 
import pandas as pd 
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.multiprocessing as mp
import torch.nn.functional as F
from torch.cuda.amp import autocast
from torch.utils.data import DataLoader
import torch.distributed as dist
from models.MAT import MAT
# from datasets.dataset import DeepfakeDataset
from AGDA import AGDA
import cv2
from utils import dist_average,ACC
from config import train_config
import neptune.new as neptune

from torch.utils.data import Dataset
from datasets.augmentations import augmentations
from albumentations import CenterCrop,Compose,Resize,RandomCrop
from albumentations.pytorch.transforms import ToTensorV2
from sklearn.model_selection import train_test_split

In [13]:
#from torch.utils.tensorboard import SummaryWriter
cv2.setNumThreads(0)
cv2.ocl.setUseOpenCL(False)
# GPU settings
assert torch.cuda.is_available()
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#torch.autograd.set_detect_anomaly(True)

In [14]:
run = neptune.init(
    project="Botz/VIPCup-logs",
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJkNWJjMDdhNC05NWY5LTQwNWQtYTQyNi0zNjNmYmYwZDg3M2YifQ==",
)  # your credentials

https://app.neptune.ai/Botz/VIPCup-logs/e/VIP-4
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#.stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.


In [23]:
class DeepfakeDataset(Dataset):
    def __init__(self, df, resize=(320,320), augment='augment0', normalize=dict(mean=[0.5,0.5,0.5],std=[0.5,0.5,0.5]), phase='train'):
        self.df = df 
        self.aug=augmentations[augment]
        self.file_paths = self.df['file_path'].values 
        self.labels = self.df['label'].values 
        self.trans=Compose([RandomCrop(*resize),ToTensorV2()])
        self.phase = phase

    def __getitem__(self, idx):
        image_path = self.file_paths[idx]
        image_label = self.labels[idx]
        image = cv2.imread(image_path, 1)
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        # image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        # orig_image = torch.from_numpy(np.transpose(image, (2,0,1)))
        # resized_image = Resize(size=self.image_size)(orig_image)
#         return {'image': resized_image, 'label': torch.from_numpy(image_label)}
        image=self.aug(image=image)['image']
        final_image = self.trans(image=image)['image']
        return final_image, torch.from_numpy(np.array(image_label))

    def __len__(self):
        return self.df.shape[0]

    def next_epoch(self):
        self.epoch+=1

In [24]:
ann_path = './data/SampleData/labels.csv'
ann_df = pd.read_csv(ann_path)
X, y = ann_df.iloc[:, :-1], ann_df.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42, stratify=y)

In [25]:
train_df = X_train.copy()
train_df['label'] = y_train

eval_df = X_test.copy()
eval_df['label'] = y_test

train_df.head()

Unnamed: 0,imageId,file_path,label
5516,407518,./data/SampleData\Real\000000407518.jpg,0
3266,149568,./data/SampleData\Real\000000149568.jpg,0
6468,517832,./data/SampleData\Real\000000517832.jpg,0
4752,319100,./data/SampleData\Real\000000319100.jpg,0
2958,113403,./data/SampleData\Real\000000113403.jpg,0


In [26]:
def load_state(net,ckpt):
    sd=net.state_dict()
    nd={}
    goodmatch=True
    for i in ckpt:
        if i in sd and sd[i].shape==ckpt[i].shape:
            nd[i]=ckpt[i]
            #print(i)
        else:
            print('fail to load %s'%i)
            goodmatch=False
    net.load_state_dict(nd,strict=False)
    return goodmatch

In [27]:
def train_loss(loss_pack,config):
    if 'loss' in loss_pack:
        return loss_pack['loss']
    loss=config.ensemble_loss_weight*loss_pack['ensemble_loss']+config.aux_loss_weight*loss_pack['aux_loss']
    if config.AGDA_loss_weight!=0:
        loss+=config.AGDA_loss_weight*loss_pack['AGDA_ensemble_loss']+config.match_loss_weight*loss_pack['match_loss']
    return loss

In [28]:
def run_step(logs,data_loader,net,optimizer,device,config,AG=None,phase='train'):
    if config.AGDA_loss_weight==0:
        AG=None
    recorder={}
    if config.feature_layer=='logits':
        record_list=['loss','acc']
    else:
        record_list=['ensemble_loss','aux_loss','ensemble_acc']
        if AG is not None:
            record_list+=['AGDA_ensemble_loss','match_loss']
    for i in record_list:
        recorder[i]=dist_average(device)
    # begin training
    start_time = time.time()
    if phase=='train':
        net.train()
    else: net.eval()
    with tqdm(data_loader, unit="batch") as tepoch:
        for X, y in tepoch:
            X = X.float().to(device)
            y = y.to(device)
            with torch.set_grad_enabled(phase=='train'):
                with autocast():
                    loss_pack=net(X,y,train_batch=True,AG=AG)
            if phase=='train':
                batch_loss = train_loss(loss_pack,config)
                batch_loss.backward()
                optimizer.step()
                optimizer.zero_grad()
            with torch.no_grad():
                if config.feature_layer=='logits':
                    loss_pack['acc']=ACC(loss_pack['logits'],y)
                else:
                    loss_pack['ensemble_acc']=ACC(loss_pack['ensemble_logit'],y)
            for i in record_list:
                recorder[i].step(loss_pack[i])
            tepoch.set_postfix({r : recorder[r].get() for r in record_list})
                

    # end of this epoch
    batch_info=[]
    for i in record_list:
        mesg=recorder[i].get()
        logs[i]=mesg
        batch_info.append('{}:{:.4f}'.format(i,mesg))
    end_time = time.time()

    # write log for this epoch
    if phase == 'train':
        for i in record_list:
            run[f'train/{i}'].log(recorder[i].get())
    else:
        for i in record_list:
            run[f'eval/{i}'].log(recorder[i].get())
    logging.info('{}: {}, Time {:3.2f}'.format(phase,'  '.join(batch_info), end_time - start_time))


In [29]:
def main_worker(config):
    # rank=local_rank+rank_offset
    logging.basicConfig(
    filename=os.path.join('runs', config.name,'train.log'),
    filemode='a',
    format='%(asctime)s: %(levelname)s: [%(filename)s:%(lineno)d]: %(message)s',
    level=logging.INFO)
    warnings.filterwarnings("ignore")
    # dist.init_process_group(backend='nccl', init_method=config.url,world_size=world_size, rank=rank)
    # if rank==0:
    #     try:
    #         os.remove('/tmp/.pytorch_distribute')
    #     except:
    #         pass
    np.random.seed(1234567)
    torch.manual_seed(1234567)
    torch.cuda.manual_seed(1234567)
    # torch.cuda.set_device(local_rank)

    print("Start Data preparation ...")
    train_dataset = DeepfakeDataset(phase='train',**config.train_dataset)
    validate_dataset=DeepfakeDataset(phase='test',**config.val_dataset)
    # train_sampler=torch.utils.data.distributed.DistributedSampler(train_dataset)
    # validate_sampler=torch.utils.data.distributed.DistributedSampler(validate_dataset)
    train_loader=torch.utils.data.DataLoader(train_dataset, batch_size=config.batch_size, pin_memory=True)
    validate_loader=torch.utils.data.DataLoader(validate_dataset, batch_size=config.batch_size, pin_memory=True)
    print("Successfully complete Data preparation ...")
    logs = {}
    start_epoch = 0
    net = MAT(**config.net_config)
    for i in config.freeze:
        if 'backbone' in i:
            net.net.requires_grad_(False)
        elif 'attention' in i:
            net.attentions.requires_grad_(False)
        elif 'feature_center' in i:
            net.auxiliary_loss.alpha=0
        elif 'texture_enhance' in i:
            net.texture_enhance.requires_grad_(False)
        elif 'fcs' in i:
            net.projection_local.requires_grad_(False)
            net.project_final.requires_grad_(False)
            net.ensemble_classifier_fc.requires_grad_(False)
        else:
            if 'xception' in str(type(net.net)):
                for j in net.net.seq:
                    if j[0]==i:
                        for t in j[1]:
                            t.requires_grad_(False)
            
            if 'EfficientNet' in str(type(net.net)):
                if i=='b0':
                    net.net._conv_stem.requires_grad_(False)
                stage_map=net.net.stage_map
                for c in range(len(stage_map)-2,-1,-1):
                    if not stage_map[c]:
                        stage_map[c]=stage_map[c+1]
                for c1,c2 in zip(stage_map,net.net._blocks):
                    if c1==i:
                        c2.requires_grad_(False)
    print("Model Initialzation ...")
    # net=nn.SyncBatchNorm.convert_sync_batchnorm(net)#.to(local_rank)
    net.to(DEVICE)
    AG=AGDA(**config.AGDA_config)#.to(local_rank)
    optimizer = torch.optim.AdamW(net.parameters(), lr=config.learning_rate, betas=config.adam_betas, weight_decay=config.weight_decay)
    scheduler=torch.optim.lr_scheduler.StepLR(optimizer, step_size=config.scheduler_step, gamma=config.scheduler_gamma)
    if config.ckpt:
        loc = DEVICE
        checkpoint = torch.load(config.ckpt, map_location=loc)
        logs = checkpoint['logs']
        start_epoch = int(logs['epoch'])+1
        if load_state(net.module,checkpoint['state_dict']) and config.resume_optim:
            optimizer.load_state_dict(checkpoint['optimizer_state'])
            try:
                scheduler.load_state_dict(checkpoint['scheduler_state'])
            except:
                pass
        else:
            net.module.auxiliary_loss.alpha=torch.tensor(config.alpha)
        del checkpoint
    torch.cuda.empty_cache()
    print("Start Model Training ...")
    for epoch in range(start_epoch, config.epochs):
        print(f'[EPOCH] - {epoch}')
        logs['epoch'] = epoch
        # train_sampler.set_epoch(epoch)
        # train_sampler.dataset.next_epoch()
        run_step(logs=logs, data_loader=train_loader, net=net, optimizer=optimizer, device=DEVICE, config=config, AG=AG, phase='train')
        run_step(logs=logs, data_loader=validate_loader, net=net, optimizer=optimizer, device=DEVICE, config=config, phase='valid')
        net.auxiliary_loss.alpha*=config.alpha_decay # because of 'module' not found error.
        scheduler.step()
    else :
        torch.save({
                'logs': logs,
                'state_dict': net.state_dict(),
                'optimizer_state': optimizer.state_dict(),
                'scheduler_state':scheduler.state_dict()}, 'checkpoints/'+config.name+'/ckpt_%s.pth'%epoch)
        # dist.barrier()

In [30]:
def distributed_train(config,world_size=0,num_gpus=0,rank_offset=0):
    if not num_gpus:
        num_gpus = torch.cuda.device_count()
    if not world_size:
        world_size=num_gpus
    mp.spawn(main_worker, nprocs=num_gpus, args=(world_size,rank_offset,config))
    torch.cuda.empty_cache()

In [31]:
params = {
    "Model-Name": 'multi-attention model',
    'Dataset-Name': 'Custome-testingV2',
    'note': 'experiment with RGB color format input images',
    "attention_layer": "b5",
    "batch_size": 16,
    "Augmentations": "horiz. flip, random crop",
    "learning_rate": 0.001, 
    "optimizer": "AdamW",
}
run["parameters"] = params

In [32]:
name='rgb_test'
url='tcp://127.0.0.1:27015'
Config=train_config(name,['custom'],url=url, df=train_df, eval_df=eval_df, attention_layer='b5',feature_layer='logits',epochs=5,batch_size=16,AGDA_loss_weight=0, augment='augment0')
Config.mkdirs()
main_worker(Config) 
run.stop()

Start Data preparation ...
Successfully complete Data preparation ...
Model Initialzation ...
Start Model Training ...
[EPOCH] - 0


100%|██████████| 88/88 [02:06<00:00,  1.43s/batch, loss=0.433, acc=0.8]  
100%|██████████| 88/88 [00:25<00:00,  3.42batch/s, loss=0.182, acc=0.95] 


[EPOCH] - 1


100%|██████████| 88/88 [01:07<00:00,  1.29batch/s, loss=0.154, acc=0.952]
100%|██████████| 88/88 [00:24<00:00,  3.59batch/s, loss=0.0626, acc=0.99] 


[EPOCH] - 2


100%|██████████| 88/88 [01:07<00:00,  1.31batch/s, loss=0.06, acc=0.989]  
100%|██████████| 88/88 [00:23<00:00,  3.68batch/s, loss=0.0253, acc=0.997]


[EPOCH] - 3


100%|██████████| 88/88 [01:07<00:00,  1.30batch/s, loss=0.0402, acc=0.989]
100%|██████████| 88/88 [00:24<00:00,  3.66batch/s, loss=0.0188, acc=0.995]


[EPOCH] - 4


100%|██████████| 88/88 [01:07<00:00,  1.31batch/s, loss=0.0292, acc=0.991]
100%|██████████| 88/88 [00:23<00:00,  3.67batch/s, loss=0.0171, acc=0.997]


Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 12 operations to synchronize with Neptune. Do not kill this process.
All 12 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/Botz/VIPCup-logs/e/VIP-4


---
Model loading and inferencing 

In [33]:
Config.net_config

{'net': 'xception',
 'feature_layer': 'logits',
 'attention_layer': 'b5',
 'num_classes': 2,
 'M': 4,
 'mid_dims': 256,
 'dropout_rate': 0.25,
 'drop_final_rate': 0.5,
 'pretrained': '',
 'alpha': 0.05,
 'size': (200, 200),
 'margin': 0.5,
 'inner_margin': [0.1, -2]}

In [34]:

with open('./tmp/model_config.json', 'w') as pf:
    json.dump(Config.net_config, pf)


In [35]:
def load_model(chk_path, config_path):
    with open(config_path, 'r') as pf:
        model_config = json.load(pf)

    model = MAT(**model_config)
    model_checkpoint = torch.load(chk_path)
    model.load_state_dict(model_checkpoint['state_dict'])
    model.eval()

    return model 

In [4]:
test_model = load_model('./checkpoints/Efb4/ckpt_4.pth', './tmp/model_config.json')

In [5]:
test_model

MAT(
  (net): xception(
    (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu1): ReLU(inplace=True)
    (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu2): ReLU(inplace=True)
    (block1): Block(
      (skip): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
      (skipbn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (rep): Sequential(
        (0): SeparableConv2d(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
          (pointwise): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        )
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
 

In [6]:
def load_sample(image_path):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    print(image.shape)
    image = cv2.resize(image, (200, 200))
    print(image.shape)
    image = np.transpose(image, (2,0,1))[np.newaxis, ...]
    print(image.shape)
    image_tensor = torch.from_numpy(image).float()
    print(image_tensor.shape)
    return image_tensor

In [7]:
sample_tensor = load_sample('./data/SampleData/1.jpg')

(1250, 1000, 3)
(200, 200, 3)
(1, 3, 200, 200)
torch.Size([1, 3, 200, 200])


In [10]:
sample_output = torch.softmax(test_model(sample_tensor), dim=-1)


In [11]:
sample_output

tensor([[9.9989e-01, 1.1377e-04]], grad_fn=<SoftmaxBackward0>)