In [61]:
import os

DATA_PATH = 'data/'

In [62]:
import torch
import torch.nn as nn
import numpy as np
from PIL import Image
from torchvision.datasets import ImageFolder
from imgaug import augmenters as iaa
import imgaug as ia
from torch.utils.data import ConcatDataset, DataLoader


train_path = os.path.join(DATA_PATH, 'train')
val_path = os.path.join(DATA_PATH, 'test')

In [63]:
NUM_EPOCHS = 100
IMAGE_SHAPE = 224
BATCH_SIZE = 12

use_cuda = torch.cuda.is_available()
dtype = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor

In [64]:
class TrainAugTransform:
    def __init__(self, rotation):
        self.aug = iaa.Sequential([
            iaa.Scale((IMAGE_SHAPE, IMAGE_SHAPE)),
            iaa.Sometimes(0.25, iaa.GaussianBlur(sigma=(0, 3.0))),
            iaa.Fliplr(0.5),
            iaa.Affine(rotate=rotation, mode='symmetric'),
            iaa.Sometimes(0.2, iaa.Dropout(p=(0, 0.1))),
            iaa.AddToHueAndSaturation(value=(-10, 10), per_channel=True)
        ])
      
    def __call__(self, img):
        img = np.array(img)
        img = self.aug.augment_image(img)
        img = np.transpose(img, [2, 0, 1]) / np.max(img)
        return img

In [65]:
class TestAugTransform:
    def __init__(self):
        self.aug = iaa.Sequential([
            iaa.Scale((IMAGE_SHAPE, IMAGE_SHAPE)),
        ])
      
    def __call__(self, img):
        img = np.array(img)
        img = self.aug.augment_image(img)
        img = np.transpose(img, [2, 0, 1]) / np.max(img)
        return img

In [66]:

human_dataset1 = ImageFolder(train_path, transform=TrainAugTransform(-12))
human_dataset2 = ImageFolder(train_path, transform=TrainAugTransform(0))
human_dataset3 = ImageFolder(train_path, transform=TrainAugTransform(12))
human_dataset = ConcatDataset([human_dataset1, human_dataset2, human_dataset3])

data_loader = DataLoader(human_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(ImageFolder(val_path, transform=TestAugTransform()), batch_size=BATCH_SIZE, shuffle=True)


In [67]:
import torch.nn.functional as F

class SimpleVGGClassifier(nn.Module):
    def __init__(self, image_shape):
        super().__init__()
        self.kernel_size = 3
        self.conv1 = self.conv_block(3, 64, 2)
        self.bn1 = nn.BatchNorm2d(64)
        self.conv2 = self.conv_block(64, 128, 3)
        self.bn2 = nn.BatchNorm2d(128)
        self.conv3 = self.conv_block(128, 256, 4)
        self.bn3 = nn.BatchNorm2d(256)
        self.conv4 = self.conv_block(256, 512, 4)
        self.bn4 = nn.BatchNorm2d(512)
        # n x 512 x 14 x 14

        self.linear1 = nn.Linear(512 * 14 * 14, 1000)
        self.lrelu1 = nn.LeakyReLU()
        self.drop1 = nn.Dropout(p=0.3)
        self.linear2 = nn.Linear(1000, 1000)
        self.lrelu2 = nn.LeakyReLU()
        self.drop2 = nn.Dropout(p=0.3)
        self.linear3 = nn.Linear(1000, 2)
        
    def conv_block(self, start_volume, end_volume, num_conv=1) -> nn.Module:
        block = nn.Sequential()
        for i in range(num_conv):
            name_conv, name_relu = 'conv_{}_{}'.format(i, start_volume), 'relu_{}'.format(i)
            block.add_module(name_conv, nn.Conv2d(start_volume if i == 0 else end_volume, 
                                                  end_volume, 
                                                  self.kernel_size,
                                                  padding=1))
            block.add_module(name_relu, nn.ReLU())
        name_pool = 'pool_{}'.format(start_volume)
        block.add_module(name_pool, nn.MaxPool2d(kernel_size=2))
        return block

    def forward(self, input):
        x = self.conv1(input)
        x = self.bn1(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.conv4(x)
        x = self.bn4(x)
        x = self.linear1(x.view(input.shape[0], -1))
        x = self.lrelu1(x)
        x = self.drop1(x)
        x = self.linear2(x)
        x = self.lrelu2(x)
        x = self.drop2(x)
        x = self.linear3(x)
        return F.softmax(x, dim=1)


In [68]:
from torch.optim import Adagrad

model = SimpleVGGClassifier(IMAGE_SHAPE).cuda() if use_cuda else SimpleVGGClassifier(IMAGE_SHAPE)
opt = Adagrad(model.parameters(), lr=0.001)


In [59]:
from tqdm import tqdm
from sklearn.metrics import classification_report

train_loss, val_loss = [], []

for i in range(NUM_EPOCHS):
    tmp_train_loss, tmp_val_loss = [], []

    model = model.train()
    for batch, labels in tqdm(data_loader, file=sys.stderr):
        opt.zero_grad()
        cuda_batch = batch.type(dtype)
        cuda_labels = labels.type(dtype)
        result = model(cuda_batch)
        loss = F.binary_cross_entropy(result[:, 1], cuda_labels)
        tmp_train_loss.append(loss.item())
        loss.backward()
        opt.step()
    
    val_model_result = np.zeros((len(val_loader) * BATCH_SIZE, 2))

    model = model.eval()
    for i, (batch, labels) in enumerate(tqdm(val_loader, file=sys.stderr)):
        with torch.no_grad():
            cuda_batch = batch.type(dtype)
            cuda_labels = labels.type(dtype)
            result = model(cuda_batch)
            val_model_result[i * BATCH_SIZE:i * BATCH_SIZE + len(result)] = result.clone().detach().cpu()
            loss = F.binary_cross_entropy(result[:, 1], cuda_labels)
            tmp_val_loss.append(loss.item())

    train_loss.append(np.mean(tmp_train_loss))
    val_loss.append(np.mean(tmp_val_loss))

    print(f'[INFO] Train loss: {train_loss[-1]}, Validation loss: {val_loss[-1]}')


100%|██████████| 73/73 [25:38<00:00, 21.07s/it]
  0%|          | 0/9 [00:06<?, ?it/s]


ValueError: could not broadcast input array from shape (12,2) into shape (12)

In [None]:
from sklearn.metrics import classification_report

val_model_result = np.zeros(len(val_loader) * BATCH_SIZE)
real_targets = np.zeros(len(val_loader) * BATCH_SIZE)

model = model.eval()
for i, (batch, labels) in enumerate(val_loader):
    with torch.no_grad():
        cuda_batch = batch.type(dtype)
        cuda_labels = labels.type(dtype)
        result = model(cuda_batch)
        print(labels, result)
        _res = result.clone().detach().cpu().numpy().argmax(axis=1)
        val_model_result[(i * BATCH_SIZE):(i * BATCH_SIZE + len(result))] = _res
        real_targets[(i * BATCH_SIZE):(i * BATCH_SIZE + len(result))] = labels
        loss = F.binary_cross_entropy(result[:, 1], cuda_labels)
        tmp_val_loss.append(loss.item())

In [None]:
val_model_result, real_targets

In [None]:
print(classification_report(real_targets, val_model_result))

In [None]:
torch.save(model.state_dict(), os.path.join(os.getcwd(), 'model.pt'))

tensor([1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0]) tensor([[0.8032, 0.1968],
        [0.6820, 0.3180],
        [0.8772, 0.1228],
        [0.4276, 0.5724],
        [0.6961, 0.3039],
        [0.5297, 0.4703],
        [0.5848, 0.4152],
        [0.9096, 0.0904],
        [0.5692, 0.4308],
        [0.4871, 0.5129],
        [0.7938, 0.2062],
        [0.6524, 0.3476]])
tensor([1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1]) tensor([[0.8155, 0.1845],
        [0.5058, 0.4942],
        [0.8879, 0.1121],
        [0.8689, 0.1311],
        [0.5196, 0.4804],
        [0.7906, 0.2094],
        [0.3823, 0.6177],
        [0.8359, 0.1641],
        [0.4563, 0.5437],
        [0.8875, 0.1125],
        [0.2658, 0.7342],
        [0.4357, 0.5643]])
tensor([1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1]) tensor([[0.4539, 0.5461],
        [0.3347, 0.6653],
        [0.6576, 0.3424],
        [0.3798, 0.6202],
        [0.4152, 0.5848],
        [0.2745, 0.7255],
        [0.5442, 0.4558],
        [0.6612, 0.3388],
        [0.2548, 0.7452],
     

In [83]:
val_model_result, real_targets

(array([0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
        0., 1., 0., 1., 0., 1., 1., 1., 1., 0., 1., 1., 1., 0., 0., 1., 1.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 1., 0., 0.,
        0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0., 1.,
        0., 0., 1., 1., 1., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0.,
        1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.]),
 array([1., 0., 0., 1., 1., 1., 0., 1., 1., 0., 0., 0., 1., 1., 1., 0., 1.,
        0., 0., 0., 1., 0., 0., 1., 1., 1., 0., 1., 0., 1., 1., 1., 0., 1.,
        1., 1., 1., 0., 1., 1., 1., 0., 0., 0., 0., 0., 1., 0., 1., 0., 1.,
        0., 1., 1., 1., 0., 0., 1., 0., 1., 0., 1., 1., 1., 1., 0., 0., 1.,
        0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 1., 1., 1., 0., 1., 1., 1.,
        0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 1., 0., 0.,
        0., 0., 0., 0., 0., 0.]))

In [84]:
print(classification_report(real_targets, val_model_result))

              precision    recall  f1-score   support

         0.0       0.57      0.76      0.65        58
         1.0       0.55      0.34      0.42        50

    accuracy                           0.56       108
   macro avg       0.56      0.55      0.54       108
weighted avg       0.56      0.56      0.54       108



In [None]:
torch.save(model.state_dict(), os.path.join(os.getcwd(), 'model.pt'))