In [1]:
import os
import shutil
import argparse
import easydict
import torch
import torch.nn as nn
import torch.utils.data as data
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torchvision
import torchvision.transforms as transforms
import torch.utils.data.dataset
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt 
import matplotlib.image as image 
import torchvision.models as models
import math
import time
import random
import skimage.io as io
import concurrent.futures
from efficientnet_pytorch import EfficientNet
from PIL import Image
import cv2

#from dataset import DFDCDatatset

model_names = sorted(name for name in models.__dict__
                     if name.islower() and not name.startswith("__")
                     and callable(models.__dict__[name]))
torch.backends.cudnn.enabled = False
torch.backends.cudnn.benchmark=True

In [2]:
### Arguments
args = easydict.EasyDict({
    #"arch": 'resnet18',
    #"arch": 'resnet152',
    "arch": 'efficientnet-b6',
    #"arch": 'resnext50_32x4d',
    "root": "/storage0/bxc/data/train",
    "train_list": "/storage0/bxc/data/train/train_list.txt",
    "test_list": "/storage0/bxc/data/train/test_list.txt",
    "epochs": 10,
    "batch_size": 8,
    "lr": 0.0001,
    "momentum": 0.9, #0.99
    "weight_decay": 5e-4,
    
    "print_freq": 50,
    "eval_freq": 10,
    
    "workers": 1,
    "resume": False,
    "pretrained": False,
    "evaluate": False,
    "start_epoch": 0,
    "gpu": 0,
})

In [3]:
### Data load

class ImageRecord(object):
    def __init__(self, row):
        self._data = row

    @property
    def path(self):
        return self._data[0]

    @property
    def label(self):
        return int(self._data[1])


class DFDCDatatset(data.Dataset):
    def __init__(self, root_path, list_file, transform=None):
        self.root_path = root_path
        self.list_file = list_file
        self.transform = transform

        self._parse_list()

    def _load_image(self, image_path):
        return Image.open(image_path).convert('RGB')
#         img = cv2.imread(image_path)
#         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#         return img

    def _parse_list(self):
        self.image_list = [ImageRecord(x.strip().split(' ')) for x in open(self.list_file)]

    def __getitem__(self, index):
        record = self.image_list[index]

        image = self._load_image(record.path)
        
        if self.transform is not None:
            image = self.transform(image)

        return image, record.label

    def __len__(self):
        return len(self.image_list)


In [4]:
### train function
def train(train_loader, model, criterion, optimizer, epoch):
    model.train()

    print('-' * 50)
    print('Epoch {}/{}'.format(epoch + 1, args.epochs))

    running_loss = 0.0
    running_corrects = 0

    # Iterate over data.
    time1=0
    time1=time.time()
    for idx, (images, target) in enumerate(train_loader):
        #print(idx)
        if args.gpu is not None:
            images = images.cuda(args.gpu, non_blocking=True)
            target = target.cuda(args.gpu, non_blocking=True)
        
        outputs = model(images)
        #print(outputs.data)
        _, preds = torch.max(outputs.data, 1)

        loss = criterion(outputs, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if idx % args.print_freq == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch + 1, idx * len(images), len(train_loader.dataset),
                100. * idx / len(train_loader), loss.item()))
            print("train {} image:".format(args.print_freq * len(images)),time.time()-time1)
            time1=time.time()
        # statistics
        running_loss += loss.item() * images.size(0)
        running_corrects += torch.sum(preds == target.data)

    epoch_loss = running_loss / float(len(train_loader.dataset))
    epoch_acc = running_corrects / float(len(train_loader.dataset))

    print('Training Loss: {:.4f} Acc: {:.4f}'.format(
        epoch_loss,epoch_acc))

In [5]:
### test function

def validate(test_loader, model, criterion):
    model.eval()

    test_loss = 0
    correct = 0

    with torch.no_grad():
        for idx, (images, target) in enumerate(test_loader):
            if args.gpu is not None:
                images = images.cuda(args.gpu, non_blocking=True)
                target = target.cuda(args.gpu, non_blocking=True)

            output = model(images)
            test_loss += criterion(output, target).item()

            # get the index of the max log-probability
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).long().cpu().sum()

        test_loss /= float(len(test_loader.dataset))
        test_acc = 100. * correct / float(len(test_loader.dataset))

        print('\nTest set: Average loss: {:.4f}, '
              'Accuracy: {}/{} ({:.3f}%)\n'.format(
                  test_loss,correct, len(test_loader.dataset), test_acc))

        return test_acc

In [6]:
### utils

class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)


class ProgressMeter(object):
    def __init__(self, num_batches, meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def display(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'

def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')

def adjust_learning_rate(optimizer, epoch, args):
    """Sets the learning rate to the initial LR decayed by 10 every 5 epochs"""
    lr = args.lr * (0.1 ** (epoch // 3))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [7]:
def train_test_split(image_path,epoch):
    image_path = image_path
    length = int(len(image_path)/args.epochs)
    #length = int(len(image_path))
    #random.shuffle(image_path)
    test_path,train_path=[],[]
    test_len = int(length * 0.2)
    train_len = int(length * 0.8)
    
    train_start = epoch*length
    train_end = train_start+train_len
    test_end = train_end+test_len
    train_path = image_path[train_start:train_end]
    test_path = image_path[train_end:test_end]
    print(epoch,"train_start:",train_start,"\ttrain_end:",train_end,"\ttest_end",test_end)
    print(test_path[0])
    print(train_path[0])
    #train_path = image_path[0:train_len]
    #test_path = image_path[train_len:length]
    #print(len(image_path),train_len,test_len)


    fileObject_train = open(r'/storage0/bxc/data/train/train_list.txt', 'w')
    fileObject_test = open(r'/storage0/bxc/data/train/test_list.txt', 'w')
    for path in test_path:
        fileObject_test.write(path)
        fileObject_test.write('\n')
    for path in train_path:
        fileObject_train.write(path)
        fileObject_train.write('\n')

In [8]:
### model training and test
num_class = 2
best_acc1 = 0

#load model
# if args.pretrained:
#     print("=> using pre-trained model '{}'".format(args.arch))
#     model = models.__dict__[args.arch](pretrained=True)
# else:
#     print("=> creating model '{}'".format(args.arch))
#     model = models.__dict__[args.arch]()

model = EfficientNet.from_pretrained(args. arch,num_classes=2)
print("=> using pre-trained model '{}'".format(args.arch))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#device = torch.device('cpu')
# model.fc = nn.ReLU(True)
# model.fc = nn.Dropout(0.5)
# model.fc = nn.Linear(1000, 512)
# model.fc = nn.ReLU(True)
# model.fc = nn.Dropout(0.2)
#model = F.softmax(model.fc, dim=1)
#model.fc = nn.Linear(1000, num_class)

# modify the number of output nodes
model = model.cuda(args.gpu)

# define loss function (criterion) and optimizer
criterion = nn.CrossEntropyLoss().cuda()
#criterion = nn.BCELoss().cuda()
#criterion = nn.SmoothL1Loss().cuda()
#criterion = nn.BCEWithLogitsLoss().cuda()
#criterion = nn.TripletMarginLoss().cuda()

# optimizer = torch.optim.SGD(model.parameters(), args.lr,
#                             momentum=args.momentum,
#                             weight_decay=args.weight_decay)

optimizer = torch.optim.AdamW(model.parameters())  

criterion = criterion.to(device)
print(model.state_dict)
# optionally resume from a checkpoint
if args.resume:
    if os.path.isfile(args.resume):
        print("=> loading checkpoint '{}'".format(args.resume))
        if args.gpu is None:
            checkpoint = torch.load(args.resume)
        else:
            # Map model to be loaded to specified single gpu.
            loc = 'cuda:{}'.format(args.gpu)
            checkpoint = torch.load(args.resume, map_location=loc)
        args.start_epoch = checkpoint['epoch']
        best_acc1 = checkpoint['best_acc1']
        if args.gpu is not None:
            # best_acc1 may be from a checkpoint from a different GPU
            best_acc1 = best_acc1.to(args.gpu)
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("=> loaded checkpoint '{}' (epoch {})"
              .format(args.resume, checkpoint['epoch']))
    else:
        print("=> no checkpoint found at '{}'".format(args.resume))

cudnn.benchmark = True

# Data loading code
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
# normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5],
#                                  std=[0.5, 0.5, 0.5])

if args.evaluate:
    validate(test_loader, model, criterion, args)

image_path = []
count = 0
directory = os.walk(r'/storage0/bxc/data/train/real')
for path ,dir_,filelist in directory:
    for filename in filelist:
        if filename.endswith('jpg'):
            image_path.append(path+'/'+filename+' 0')
            #count+=1
            #if count >3:
                #count = 0
            break
count = 0
directory = os.walk(r'/storage0/bxc/data/train/fake')
for path ,dir_,filelist in directory:
    for filename in filelist:
        if filename.endswith('jpg'):
            image_path.append(path+'/'+filename+' 1')
#             count+=1
#             if count >3:
#                 count = 0
            break
random.shuffle(image_path)
print(len(image_path))

Loaded pretrained weights for efficientnet-b6
=> using pre-trained model 'efficientnet-b6'
<bound method Module.state_dict of EfficientNet(
  (_conv_stem): Conv2dStaticSamePadding(
    3, 56, kernel_size=(3, 3), stride=(2, 2), bias=False
    (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
  )
  (_bn0): BatchNorm2d(56, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
  (_blocks): ModuleList(
    (0): MBConvBlock(
      (_depthwise_conv): Conv2dStaticSamePadding(
        56, 56, kernel_size=(3, 3), stride=[1, 1], groups=56, bias=False
        (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
      )
      (_bn1): BatchNorm2d(56, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
      (_se_reduce): Conv2dStaticSamePadding(
        56, 14, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_se_expand): Conv2dStaticSamePadding(
        14, 56, kernel_size=(1, 1), stride=

99514


In [None]:

##################################################################################################
for epoch in range(args.start_epoch, args.epochs):
    
    train_test_split(image_path, epoch)
        
    time1,time2=0,0
    time1 = time.time()
    train_dataset = DFDCDatatset(args.root,
                                 args.train_list,
                                 transforms.Compose([
#                                      transforms.ColorJitter(brightness=0.5, contrast=0.4, saturation=0.3, hue=0.5),
#                                      transforms.RandomRotation(10),
#                                      transforms.RandomResizedCrop(256),
#                                      transforms.RandomHorizontalFlip(p=0.5),
                                     transforms.ToTensor(),
                                     normalize,
                                 ]))
    print("time1: ",time.time()-time1)
    time2 = time.time()
    test_dataset = DFDCDatatset(args.root,
                                args.test_list,
                                transforms.Compose([
                                    transforms.ToTensor(),
                                    normalize,
                                ]))
    print("time2: ",time.time()-time2)


    time3,time4=0,0
    time3 = time.time()
    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.batch_size, shuffle=True,
        num_workers=args.workers, pin_memory=True)
    print("time3: ",time.time()-time3)
    time4 = time.time()
    test_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=args.batch_size, shuffle=True,
        num_workers=args.workers, pin_memory=False)
    print("time4: ",time.time()-time4)
      
    start = time.time()
    
    adjust_learning_rate(optimizer, epoch, args)

    # train for one epoch
    train(train_loader, model, criterion, optimizer, epoch)

    # evaluate on validation set
    acc1 = validate(test_loader, model, criterion)
    
    # remember best acc@1 and save checkpoint
    is_best = acc1 > best_acc1
    best_acc1 = max(acc1, best_acc1)

    save_checkpoint({
        'epoch': epoch + 1,
        'arch': args.arch,
        'state_dict': model.state_dict(),
        'best_acc1': best_acc1,
        'optimizer': optimizer.state_dict(),
    }, is_best)
    end = time.time()
    print("The time of each epoch is %f",end-start)

0 train_start: 0 	train_end: 7960 	test_end 9950
/storage0/bxc/data/train/fake/CW/20200901/cw-eunseoh/dfl/output_cw-eunseoh_010/image_00003.jpg 1
/storage0/bxc/data/train/fake/CW/20200927/cw-dohun/dffs/output_cw-dohun_077/image_00003.jpg 1
time1:  0.008974790573120117
time2:  0.0020952224731445312
time3:  0.0005202293395996094
time4:  3.4809112548828125e-05
--------------------------------------------------
Epoch 1/10
train 400 image: 0.9942264556884766
train 400 image: 36.46618032455444
train 400 image: 36.898930311203
train 400 image: 37.07547640800476
train 400 image: 37.13593411445618
train 400 image: 37.13909721374512
train 400 image: 37.119672536849976
train 400 image: 37.134079933166504
train 400 image: 37.12502717971802
train 400 image: 37.127883434295654
train 400 image: 37.11466932296753
train 400 image: 37.10167670249939
train 400 image: 37.08930492401123
train 400 image: 37.09045648574829
train 400 image: 37.11014986038208
train 400 image: 37.119332790374756
train 400 image

train 400 image: 37.11325550079346
train 400 image: 37.10469388961792
train 400 image: 37.11990475654602
train 400 image: 37.11593174934387
train 400 image: 37.24954962730408
train 400 image: 37.34861159324646
Training Loss: 0.0046 Acc: 0.9987

Test set: Average loss: 0.0001, Accuracy: 1990/1990 (100.000%)

The time of each epoch is %f 790.3768672943115
4 train_start: 39804 	train_end: 47764 	test_end 49754
/storage0/bxc/data/train/fake/CW/20200904/cw-suuyeon/dfl/output_cw-suuyeon_074/image_00003.jpg 1
/storage0/bxc/data/train/fake/CW/20200915/cw-minkyung/fsgan/output_cw-minkyung_048/image_00003.jpg 1
time1:  0.009284734725952148
time2:  0.002893686294555664
time3:  0.003005504608154297
time4:  0.0006229877471923828
--------------------------------------------------
Epoch 5/10
train 400 image: 0.957683801651001
