In [24]:
import os

DATA_PATH = 'data/'

In [3]:
import torch
import torch.nn as nn
import numpy as np
from PIL import Image
from torchvision.datasets import ImageFolder
from imgaug import augmenters as iaa
import imgaug as ia
from torch.utils.data import ConcatDataset, DataLoader


train_path = os.path.join(DATA_PATH, 'train')
val_path = os.path.join(DATA_PATH, 'test')

In [4]:
NUM_EPOCHS = 100
IMAGE_SHAPE = 224
BATCH_SIZE = 12

use_cuda = torch.cuda.is_available()
use_cuda = False
dtype = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor

In [5]:
use_cuda, dtype

(False, torch.FloatTensor)

In [6]:
class TrainAugTransform:
    def __init__(self, rotation):
        self.aug = iaa.Sequential([
            iaa.Scale((IMAGE_SHAPE, IMAGE_SHAPE)),
            iaa.Sometimes(0.25, iaa.GaussianBlur(sigma=(0, 3.0))),
            iaa.Fliplr(0.5),
            iaa.Affine(rotate=rotation, mode='symmetric'),
            iaa.Sometimes(0.2, iaa.Dropout(p=(0, 0.1))),
            iaa.AddToHueAndSaturation(value=(-10, 10), per_channel=True)
        ])
      
    def __call__(self, img):
        img = np.array(img)
        img = self.aug.augment_image(img)
        img = np.transpose(img, [2, 0, 1]) / np.max(img)
        return img

In [7]:
class TestAugTransform:
    def __init__(self):
        self.aug = iaa.Sequential([
            iaa.Scale((IMAGE_SHAPE, IMAGE_SHAPE)),
        ])
      
    def __call__(self, img):
        img = np.array(img)
        img = self.aug.augment_image(img)
        img = np.transpose(img, [2, 0, 1]) / np.max(img)
        return img

In [8]:

human_dataset1 = ImageFolder(train_path, transform=TrainAugTransform(-12))
human_dataset2 = ImageFolder(train_path, transform=TrainAugTransform(0))
human_dataset3 = ImageFolder(train_path, transform=TrainAugTransform(12))
human_dataset = ConcatDataset([human_dataset1, human_dataset2, human_dataset3])

data_loader = DataLoader(human_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(ImageFolder(val_path, transform=TestAugTransform()), batch_size=BATCH_SIZE, shuffle=True)


  warn_deprecated(msg, stacklevel=3)


In [9]:
import torch.nn.functional as F

class SimpleVGGClassifier(nn.Module):
    def __init__(self, image_shape):
        super().__init__()
        self.kernel_size = 3
        self.conv1 = self.conv_block(3, 64, 2)
        self.bn1 = nn.BatchNorm2d(64)
        self.conv2 = self.conv_block(64, 128, 3)
        self.bn2 = nn.BatchNorm2d(128)
        self.conv3 = self.conv_block(128, 256, 4)
        self.bn3 = nn.BatchNorm2d(256)
        self.conv4 = self.conv_block(256, 512, 4)
        self.bn4 = nn.BatchNorm2d(512)
        # n x 512 x 14 x 14

        self.linear1 = nn.Linear(512 * 14 * 14, 1000)
        self.lrelu1 = nn.LeakyReLU()
        self.drop1 = nn.Dropout(p=0.3)
        self.linear2 = nn.Linear(1000, 1000)
        self.lrelu2 = nn.LeakyReLU()
        self.drop2 = nn.Dropout(p=0.3)
        self.linear3 = nn.Linear(1000, 2)
        
    def conv_block(self, start_volume, end_volume, num_conv=1) -> nn.Module:
        block = nn.Sequential()
        for i in range(num_conv):
            name_conv, name_relu = 'conv_{}_{}'.format(i, start_volume), 'relu_{}'.format(i)
            block.add_module(name_conv, nn.Conv2d(start_volume if i == 0 else end_volume, 
                                                  end_volume, 
                                                  self.kernel_size,
                                                  padding=1))
            block.add_module(name_relu, nn.ReLU())
        name_pool = 'pool_{}'.format(start_volume)
        block.add_module(name_pool, nn.MaxPool2d(kernel_size=2))
        return block

    def forward(self, input):
        x = self.conv1(input)
        x = self.bn1(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.conv4(x)
        x = self.bn4(x)
        x = self.linear1(x.view(input.shape[0], -1))
        x = self.lrelu1(x)
        x = self.drop1(x)
        x = self.linear2(x)
        x = self.lrelu2(x)
        x = self.drop2(x)
        x = self.linear3(x)
        return F.softmax(x, dim=1)


In [10]:
from torch.optim import Adagrad

model = SimpleVGGClassifier(IMAGE_SHAPE).cuda() if use_cuda else SimpleVGGClassifier(IMAGE_SHAPE)
opt = Adagrad(model.parameters(), lr=0.001)


In [22]:
from tqdm import tqdm
for batch, labels in tqdm(data_loader, file=sys.stderr):
    print(batch.shape, batch[0], "kek", labels)
    break

  0%|          | 0/397 [00:00<?, ?it/s]

torch.Size([12, 3, 224, 224]) tensor([[[0.4941, 0.5098, 0.5137,  ..., 0.1137, 0.1216, 0.1333],
         [0.4980, 0.5098, 0.5098,  ..., 0.1098, 0.1412, 0.1529],
         [0.4980, 0.5059, 0.5176,  ..., 0.1765, 0.1333, 0.0863],
         ...,
         [0.0196, 0.0196, 0.0353,  ..., 0.1098, 0.1137, 0.1176],
         [0.0196, 0.0196, 0.0667,  ..., 0.1255, 0.1137, 0.1137],
         [0.0235, 0.0235, 0.1020,  ..., 0.1255, 0.1098, 0.1059]],

        [[0.5843, 0.6000, 0.6039,  ..., 0.2078, 0.2157, 0.2196],
         [0.5882, 0.6000, 0.6000,  ..., 0.2000, 0.2314, 0.2314],
         [0.5882, 0.5961, 0.6078,  ..., 0.2627, 0.2118, 0.1412],
         ...,
         [0.0431, 0.0431, 0.0471,  ..., 0.1529, 0.1569, 0.1490],
         [0.0353, 0.0392, 0.0667,  ..., 0.1608, 0.1490, 0.1569],
         [0.0353, 0.0275, 0.0941,  ..., 0.1608, 0.1490, 0.1490]],

        [[0.6549, 0.6706, 0.6745,  ..., 0.3020, 0.3137, 0.3216],
         [0.6588, 0.6706, 0.6706,  ..., 0.2941, 0.3216, 0.3098],
         [0.6588, 0.6667, 0.




In [21]:
import sys
from tqdm import tqdm
from sklearn.metrics import classification_report

train_loss, val_loss = [], []

for i in range(NUM_EPOCHS):
    tmp_train_loss, tmp_val_loss = [], []

    model = model.train()
    for batch, labels in tqdm(data_loader, file=sys.stderr):
        opt.zero_grad()
        cuda_batch = batch.type(dtype)
        cuda_labels = labels.type(dtype)
        result = model(cuda_batch)
        loss = F.binary_cross_entropy(result[:, 1], cuda_labels)
        tmp_train_loss.append(loss.item())
        loss.backward()
        opt.step()
    
    val_model_result = np.zeros((len(val_loader) * BATCH_SIZE, 2))

    model = model.eval()
    for i, (batch, labels) in enumerate(tqdm(val_loader, file=sys.stderr)):
        with torch.no_grad():
            cuda_batch = batch.type(dtype)
            cuda_labels = labels.type(dtype)
            result = model(cuda_batch)
            val_model_result[i * BATCH_SIZE:i * BATCH_SIZE + len(result)] = result.clone().detach().cpu()
            loss = F.binary_cross_entropy(result[:, 1], cuda_labels)
            tmp_val_loss.append(loss.item())

    train_loss.append(np.mean(tmp_train_loss))
    val_loss.append(np.mean(tmp_val_loss))

    print(f'[INFO] Train loss: {train_loss[-1]}, Validation loss: {val_loss[-1]}')


  1%|          | 4/397 [01:27<2:23:08, 21.85s/it]


KeyboardInterrupt: 

In [None]:
from sklearn.metrics import classification_report

val_model_result = np.zeros(len(val_loader) * BATCH_SIZE)
real_targets = np.zeros(len(val_loader) * BATCH_SIZE)

model = model.eval()
for i, (batch, labels) in enumerate(val_loader):
    with torch.no_grad():
        cuda_batch = batch.type(dtype)
        cuda_labels = labels.type(dtype)
        result = model(cuda_batch)
        print(labels, result)
        _res = result.clone().detach().cpu().numpy().argmax(axis=1)
        val_model_result[(i * BATCH_SIZE):(i * BATCH_SIZE + len(result))] = _res
        real_targets[(i * BATCH_SIZE):(i * BATCH_SIZE + len(result))] = labels
        loss = F.binary_cross_entropy(result[:, 1], cuda_labels)
        tmp_val_loss.append(loss.item())

In [None]:
val_model_result, real_targets

In [None]:
print(classification_report(real_targets, val_model_result))

In [None]:
torch.save(model.state_dict(), os.path.join(os.getcwd(), 'model.pt'))

In [None]:
val_model_result, real_targets

In [None]:
print(classification_report(real_targets, val_model_result))

In [None]:
torch.save(model.state_dict(), os.path.join(os.getcwd(), 'model.pt'))