In [11]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from PIL import Image
from torch.optim import lr_scheduler
from torch.autograd import Variable
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
import matplotlib.pyplot as plt
import time
import os

In [12]:
data_dir = '/home/symbios/work/deep_learning/kapcha/data/store'

classes = ['W','S','A','D','-']

def vis_layers(folder, inputs):
    
    if not os.path.exists('./' + folder):
        os.makedirs('./' + folder)
        
    outputs = []
    names = []
    for layer in modulelist[1:]:
        inputs = layer(inputs)
        outputs.append(inputs)
        names.append(str(layer).partition('(')[0])

    for i, name, image in zip(range(len(names)), names, outputs):
        final = torch.cat((image[0].data), 0)
        print(name, '  ', image[0].size(), '   ', final.size())
        plt.imsave(fname='./{0}/{1}_{2}.png'.format(folder, i, name), arr=final, cmap='gray')
        
        
def accuracy(inputs, labels):
    success = 0
    for i in range(10):
        rez = []
        for index, key in enumerate(inputs):
            rez.append(key[i].max(0)[1].data[0])
        l = labels[i].data.view(1,10)
        f = torch.cuda.LongTensor(rez).view(1,10)
        
        if(torch.equal(l,f)): success+=1
    
    percent = success / len(inputs)
    return percent


def test_accuracy():
    model.eval()
    total_acc = 0
    for i, data in enumerate(test_dataset):
        inputs = Variable(data['image'].cuda())
        labels = Variable(data['labels'].cuda())
        layers = model(inputs)
        final = layers['final']

        total_acc += accuracy(final,labels)
    model.train()
    return total_acc / len(test_dataset)


def name_classes(value):
    a= ''
    for i in range(len(value)):
        a += classes[int(value[i])]
    return a

In [13]:
batch_size = 10

In [4]:
class KapchaDataset(Dataset):
    """Kapcha dataset."""

    def __init__(self, root_dir, img_dir, csv_dir, length, first_idx=0):
        """
        Args:
            root_dir (string): Root directory.
            img_dir  (string): Directory with all images.
            csv_dir  (string): Directory with labels.
            length      (int): Total length of dataset.
            first_idx   (int): First data index.
        """
        self.root_dir   = root_dir
        self.img_dir    = os.path.join(root_dir, img_dir)
        self.csv_dir    = os.path.join(root_dir, csv_dir)
        self.first_idx  = first_idx
        self.length     = length
        
        self.mean       = 0.079210128784179684
        self.std        = 0.083445704142252608

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        idx = self.first_idx + idx

        img_name    = os.path.join(self.img_dir, '{0}.jpg'.format(idx))
        image       = Image.open(img_name)
        image       = np.array(image).astype('float32') / 255
#         image       -= self.mean
#         image       /= self.std
        
        image       = image[:, :, 0].reshape(1, 41, 265)
        image       = torch.from_numpy(image)

        label_name  = os.path.join(self.csv_dir, '{0}.csv'.format(idx))
        labels      = np.genfromtxt(label_name, delimiter=',').astype('int64')
        labels      = torch.from_numpy(labels)

        sample = {'image': image, 'labels': labels, 'idx': idx}

        return sample

train_dataset   = KapchaDataset(root_dir=data_dir,
                                img_dir='resized1',
                                csv_dir='order',
                                length=12000,
                                first_idx=0)

train_dataset   = DataLoader(train_dataset,
                             batch_size=batch_size,
                             shuffle=True,
                             num_workers=0)

test_dataset    = KapchaDataset(root_dir=data_dir,
                                img_dir='resized1',
                                csv_dir='order',
                                length=2360,
                                first_idx=12000)

test_dataset    = DataLoader(test_dataset,
                             batch_size=batch_size,
                             shuffle=False,
                             num_workers=0)

In [5]:
class Parser(nn.Module):

    def __init__(self):
        super(Parser, self).__init__()
        
        self.features = torch.nn.Sequential(
            torch.nn.Conv2d(1, 16, 3),
            torch.nn.BatchNorm2d(16),
            torch.nn.LeakyReLU(),
            torch.nn.MaxPool2d(2, stride=2),
            
            torch.nn.Conv2d(16, 32, 3),
            torch.nn.BatchNorm2d(32),
            torch.nn.LeakyReLU(),
            torch.nn.MaxPool2d(2, stride=2),
            
            torch.nn.Conv2d(32, 64, 3),
            torch.nn.BatchNorm2d(64),
            torch.nn.LeakyReLU(),
            torch.nn.MaxPool2d(2, stride=2),
        )
             
        self.middle = torch.nn.Sequential(
            torch.nn.Linear(64 * 1 * 21, 1000),
            torch.nn.BatchNorm1d(1000),
            torch.nn.LeakyReLU(),
            torch.nn.Dropout(p=0.5),
            
            torch.nn.Linear(1000, 100),
            torch.nn.BatchNorm1d(100),
            torch.nn.LeakyReLU(),
        )
        
        self.classification = torch.nn.Sequential(
            torch.nn.Linear(100, 5),
            torch.nn.Linear(100, 5),
            torch.nn.Linear(100, 5),
            torch.nn.Linear(100, 5),
            torch.nn.Linear(100, 5),
            torch.nn.Linear(100, 5),
            torch.nn.Linear(100, 5),
            torch.nn.Linear(100, 5),
            torch.nn.Linear(100, 5),
            torch.nn.Linear(100, 5)
        )
        
        
    def forward(self, inputs):
        
        features = self.features(inputs)
        re_pool3 = features.view(-1 , 64 * 1 * 21)
        
        linear2 = self.middle(re_pool3)
        
        final = []
        for index, layer in enumerate(self.classification):
            final.append(layer(linear2))
        
        return {'final': final}


In [6]:
model = Parser().cuda()
model

Parser(
  (features): Sequential(
    (0): Conv2d (1, 16, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True)
    (2): LeakyReLU(0.01)
    (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (4): Conv2d (16, 32, kernel_size=(3, 3), stride=(1, 1))
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True)
    (6): LeakyReLU(0.01)
    (7): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (8): Conv2d (32, 64, kernel_size=(3, 3), stride=(1, 1))
    (9): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
    (10): LeakyReLU(0.01)
    (11): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1))
  )
  (middle): Sequential(
    (0): Linear(in_features=1344, out_features=1000)
    (1): BatchNorm1d(1000, eps=1e-05, momentum=0.1, affine=True)
    (2): LeakyReLU(0.01)
    (3): Dropout(p=0.5)
    (4): Linear(in_features=1000, out_features=100)
    (5): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=Tru

In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5, weight_decay=1e-5)

In [10]:
model.train()

for epoch in range(100):
    run_loss = 0
    run_acc = 0
    total_acc= 0
    for i, data in enumerate(train_dataset):
        inputs = Variable(data['image'].cuda())
        labels = Variable(data['labels'].cuda())
        
        layers = model(inputs)
        final = layers['final']
        
        losses = []
        for char_num in range(10):
            loss = criterion(final[char_num], labels[:,char_num])
            losses.append(loss)
        
        total_loss = sum(losses)
        acc = accuracy(final,labels)
        total_acc += acc
        run_loss += total_loss
        run_acc += acc
        
        if(i != 0 and i % 100 == 0):
#             print('{0} : {1}, Loss= {2}, Acc:{3}, TestAcc: {4}'.format(i, len(train_dataset), total_loss.data[0], total_acc / 100,test_accuracy()))
            total_acc = 0
            
        total_loss.backward()
        optimizer.step()
        optimizer.zero_grad() 
    
    print('Epoch: {0},  Loss:{1}, Acc:{2}, TestAcc: {3}'.format(epoch, run_loss.data[0] / len(train_dataset), run_acc / 1200,test_accuracy()))

Epoch: 0,  Loss:1.4684585571289062, Acc:0.8076666666666624, TestAcc: 0.8953389830508492
Epoch: 1,  Loss:1.4164515177408854, Acc:0.8138333333333276, TestAcc: 0.8822033898305106
Epoch: 2,  Loss:1.3474184163411458, Acc:0.825666666666661, TestAcc: 0.8995762711864428
Epoch: 3,  Loss:1.2920751953125, Acc:0.8362499999999928, TestAcc: 0.9199152542372901
Epoch: 4,  Loss:1.2492757161458334, Acc:0.8351666666666605, TestAcc: 0.9076271186440698
Epoch: 5,  Loss:1.199661865234375, Acc:0.8440833333333267, TestAcc: 0.925000000000002
Epoch: 6,  Loss:1.154696553548177, Acc:0.8507499999999927, TestAcc: 0.9250000000000018
Epoch: 7,  Loss:1.0965638224283853, Acc:0.8606666666666595, TestAcc: 0.924152542372883
Epoch: 8,  Loss:1.0661079915364584, Acc:0.8636666666666616, TestAcc: 0.9364406779661034
Epoch: 9,  Loss:1.0312753295898438, Acc:0.8676666666666599, TestAcc: 0.9411016949152559
Epoch: 10,  Loss:0.9915709431966145, Acc:0.8758333333333271, TestAcc: 0.9423728813559338
Epoch: 11,  Loss:0.9455376180013021, Ac

KeyboardInterrupt: 

In [34]:
torch.save(model.state_dict(), './models/{0}.pt'.format('7_gpu'))
model.cpu()
torch.save(model.state_dict(), './models/{0}.pt'.format('7_cpu'))
model.cuda()

Parser(
  (features): Sequential(
    (0): Conv2d (1, 16, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True)
    (2): LeakyReLU(0.01)
    (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (4): Conv2d (16, 32, kernel_size=(3, 3), stride=(1, 1))
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True)
    (6): LeakyReLU(0.01)
    (7): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (8): Conv2d (32, 64, kernel_size=(3, 3), stride=(1, 1))
    (9): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
    (10): LeakyReLU(0.01)
    (11): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1))
  )
  (middle): Sequential(
    (0): Linear(in_features=14080, out_features=1000)
    (1): BatchNorm1d(1000, eps=1e-05, momentum=0.1, affine=True)
    (2): LeakyReLU(0.01)
    (3): Dropout(p=0.5)
    (4): Linear(in_features=1000, out_features=100)
    (5): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=Tr

In [45]:
model.load_state_dict(torch.load('./models/{0}.pt'.format('7_cpu')))

In [13]:
it = iter(test_dataset)
data = it.next()
          
inputs = Variable(data['image'].cuda())
labels = Variable(data['labels'].cuda())

layers = model(inputs)

final = layers['final']

losses = []
for char_num in range(10):
    loss = criterion(final[char_num], labels[:,char_num])
    losses.append(loss)

total_loss = sum(losses)
total_loss.backward()
optimizer.step()
optimizer.zero_grad() 

print('Total loss: {0}'.format(total_loss.data[0]))

Total loss: 16.26456069946289


In [28]:
""" Сохранение некорректно распознанных капч"""

model.eval()
it = iter(test_dataset)

for index, data in enumerate(test_dataset):
    idx = data['idx']
    inputs = Variable(data['image'].cuda())
    labels = Variable(data['labels'].cuda())

    layers = model(inputs)

    final = layers['final']

    for number in range(batch_size):
        rez = []
        for i in range(10):
            rez.append(final[i][number].max(0)[1].data[0])

        f = torch.cuda.LongTensor(rez)
        if(not torch.equal(labels[number].data.view(1,10), f.view(1, 10))):
            plt.imsave(fname='./errors/{0}_{1}.png'.format(idx[number],name_classes(f.view(10))), arr=inputs.data[number][0], cmap='gray')

In [26]:
""" Визуализация слоев"""

it = iter(train_dataset)

value = it.next()
value = Variable(value['image'].cuda())

value.data.resize_(batch_size,1, 58, 372)
print('input size',value.size())

modulelist = list(model.features.modules())

vis_layers('visual/layers/5', value)


input size torch.Size([10, 1, 58, 372])
Conv2d     torch.Size([16, 56, 370])     torch.Size([896, 370])
BatchNorm2d    torch.Size([16, 56, 370])     torch.Size([896, 370])
LeakyReLU    torch.Size([16, 56, 370])     torch.Size([896, 370])
MaxPool2d    torch.Size([16, 28, 185])     torch.Size([448, 185])
Conv2d     torch.Size([32, 26, 183])     torch.Size([832, 183])
BatchNorm2d    torch.Size([32, 26, 183])     torch.Size([832, 183])
LeakyReLU    torch.Size([32, 26, 183])     torch.Size([832, 183])
MaxPool2d    torch.Size([32, 13, 91])     torch.Size([416, 91])
Conv2d     torch.Size([64, 11, 89])     torch.Size([704, 89])
BatchNorm2d    torch.Size([64, 11, 89])     torch.Size([704, 89])
LeakyReLU    torch.Size([64, 11, 89])     torch.Size([704, 89])
MaxPool2d    torch.Size([64, 5, 44])     torch.Size([320, 44])


In [11]:
it = iter(train_dataset)
data = it.next()
          
inputs = Variable(data['image'].cuda())
labels = Variable(data['labels'].cuda())

layers = model(inputs)
print(layers.size())

torch.Size([10, 64, 1, 21])
