In [1]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from PIL import Image
from torch.optim import lr_scheduler
from torch.autograd import Variable
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
import matplotlib.pyplot as plt
import time
import os

In [2]:
data_dir = '/home/symbios/work/deep_learning/kapcha/data/store'

classes = ['W','S','A','D','-']

def save_im(conv, name):
    final = torch.cat((conv.data), 0)
    print(name, '  ', conv.size(), '   ', final.size())
    plt.imsave(fname=name, arr=final, cmap='gray')

In [3]:
batch_size = 10

In [4]:
class KapchaDataset(Dataset):
    """Kapcha dataset."""

    def __init__(self, root_dir, img_dir, csv_dir, length, first_idx=0):
        """
        Args:
            root_dir (string): Root directory.
            img_dir  (string): Directory with all images.
            csv_dir  (string): Directory with labels.
            length      (int): Total length of dataset.
            first_idx   (int): First data index.
        """
        self.root_dir   = root_dir
        self.img_dir    = os.path.join(root_dir, img_dir)
        self.csv_dir    = os.path.join(root_dir, csv_dir)
        self.first_idx  = first_idx
        self.length     = length
#         self.mean       = (0.079210128784179684, 0.079210128784179684, 0.079210128784179684)
#         self.std        = (0.083445704142252608, 0.083445704142252608, 0.083445704142252608)

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        idx = self.first_idx + idx

        img_name    = os.path.join(self.img_dir, '{0}.jpg'.format(idx))
        image       = Image.open(img_name)
        image       = np.array(image).astype('float32') / 255
        
        image       = image[:, :, 0].reshape(1, 58, 372)
        image       = torch.from_numpy(image)

        label_name  = os.path.join(self.csv_dir, '{0}.csv'.format(idx))
        labels      = np.genfromtxt(label_name, delimiter=',').astype('int64')
        labels      = torch.from_numpy(labels)

        sample = {'image': image, 'labels': labels, 'idx': idx}

        return sample

train_dataset   = KapchaDataset(root_dir=data_dir,
                                img_dir='kapcha',
                                csv_dir='order',
                                length=12000,
                                first_idx=0)

train_dataset   = DataLoader(train_dataset,
                             batch_size=batch_size,
                             shuffle=True,
                             num_workers=0)

test_dataset    = KapchaDataset(root_dir=data_dir,
                                img_dir='kapcha',
                                csv_dir='order',
                                length=2369,
                                first_idx=12000)

test_dataset    = DataLoader(test_dataset,
                             batch_size=batch_size,
                             shuffle=False,
                             num_workers=0)

In [5]:
class Parser(nn.Module):

    def __init__(self):
        super(Parser, self).__init__()
        self.pool = torch.nn.MaxPool2d(2, stride=2)
        self.relu = nn.LeakyReLU()
        self.sigmoid = nn.Sigmoid()
        
        self.conv1 = torch.nn.Conv2d(1, 16, 3)
        self.conv2 = torch.nn.Conv2d(16, 32, 3)
        self.conv3 = torch.nn.Conv2d(32, 64, 3)
        
        self.linear1 = torch.nn.Linear(64 * 5 * 44, 1000)
        self.linear2 = torch.nn.Linear(1000, 100)
        
        self.linear01 = torch.nn.Linear(100, 5)
        self.linear02 = torch.nn.Linear(100, 5)
        self.linear03 = torch.nn.Linear(100, 5)
        self.linear04 = torch.nn.Linear(100, 5)
        self.linear05 = torch.nn.Linear(100, 5)
        self.linear06 = torch.nn.Linear(100, 5)
        self.linear07 = torch.nn.Linear(100, 5)
        self.linear08 = torch.nn.Linear(100, 5)
        self.linear09 = torch.nn.Linear(100, 5)
        self.linear10 = torch.nn.Linear(100, 5)
        
    def forward(self, inputs):
        conv1 = self.conv1(inputs)
        relu1 = self.relu(conv1)
        pool1 = self.pool(relu1)
        
        conv2 = self.conv2(pool1)
        relu2 = self.relu(conv2)
        pool2 = self.pool(relu2)
        
        conv3 = self.conv3(pool2)
        relu3 = self.relu(conv3)
        pool3 = self.pool(relu3)
        
        re_pool3 = pool3.view(-1 , 64 * 5 * 44)
        
        linear1 = self.linear1(re_pool3)
        linear2 = self.linear2(linear1)

        final = []
        final.append(self.linear01(linear2))
        final.append(self.linear02(linear2))
        final.append(self.linear03(linear2))
        final.append(self.linear04(linear2))
        final.append(self.linear05(linear2))
        final.append(self.linear06(linear2))
        final.append(self.linear07(linear2))
        final.append(self.linear08(linear2))
        final.append(self.linear09(linear2))
        final.append(self.linear10(linear2))
        
        
        return {'1_conv1': conv1, '2_relu1': relu1, '3_pool1': pool1,
                '4_conv2': conv2, '5_relu2': relu2, '6_pool2': pool2,
                '7_conv3': conv3, '8_relu3': relu3, '9_pool3': pool3, 'final': final}


In [6]:
model = Parser().cuda()
model

Parser(
  (pool): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (relu): LeakyReLU(0.01)
  (sigmoid): Sigmoid()
  (conv1): Conv2d (1, 16, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d (16, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d (32, 64, kernel_size=(3, 3), stride=(1, 1))
  (linear1): Linear(in_features=14080, out_features=1000)
  (linear2): Linear(in_features=1000, out_features=100)
  (linear01): Linear(in_features=100, out_features=5)
  (linear02): Linear(in_features=100, out_features=5)
  (linear03): Linear(in_features=100, out_features=5)
  (linear04): Linear(in_features=100, out_features=5)
  (linear05): Linear(in_features=100, out_features=5)
  (linear06): Linear(in_features=100, out_features=5)
  (linear07): Linear(in_features=100, out_features=5)
  (linear08): Linear(in_features=100, out_features=5)
  (linear09): Linear(in_features=100, out_features=5)
  (linear10): Linear(in_features=100, out_features=5)
)

In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)

In [10]:
for epoch in range(10):
    run_loss = 0
    for i, data in enumerate(train_dataset):
        inputs = Variable(data['image'].cuda())
        labels = Variable(data['labels'].cuda())
        
        layers = model(inputs)
        final = layers['final']
        
        losses = []
        for char_num in range(10):
            loss = criterion(final[char_num], labels[:,char_num])
            losses.append(loss)

        total_loss = sum(losses)
        run_loss += total_loss
        
        if(i % 100 == 0):
            print('Loss= ', total_loss.data[0])
        total_loss.backward()
        optimizer.step()
        optimizer.zero_grad() 
    
    print('Epoch: {0} - Loss:{1}'.format(epoch, run_loss.data[0]/len(train_dataset)))

Loss=  1.6903685331344604
Loss=  0.741338849067688
Loss=  0.9532225131988525
Loss=  0.8610985279083252
Loss=  1.3867994546890259
Loss=  1.8140556812286377
Loss=  1.198352336883545
Loss=  1.8545012474060059
Loss=  1.5406296253204346
Loss=  2.4570934772491455
Loss=  1.6961561441421509
Loss=  1.5229811668395996
Epoch: 0 - Loss:1.5223494466145833
Loss=  0.6402724981307983
Loss=  1.6739414930343628
Loss=  1.0715916156768799
Loss=  0.922124981880188
Loss=  0.9988433122634888
Loss=  1.49813711643219
Loss=  0.7800291776657104
Loss=  1.0127769708633423
Loss=  1.592955470085144
Loss=  2.766164541244507
Loss=  0.9748193621635437
Loss=  1.860259771347046
Epoch: 1 - Loss:1.3452581787109374
Loss=  2.2971129417419434
Loss=  1.127724051475525
Loss=  0.5303621888160706
Loss=  1.8541507720947266
Loss=  1.4033986330032349
Loss=  0.7157924771308899
Loss=  0.9216243028640747
Loss=  0.9056689739227295
Loss=  1.0791168212890625
Loss=  0.7970865368843079
Loss=  0.8470706939697266
Loss=  0.7485674023628235
Epo

In [9]:
it = iter(test_dataset)
data = it.next()
          
inputs = Variable(data['image'].cuda())
labels = Variable(data['labels'].cuda())

layers = model(inputs)

final = layers['final']

losses = []
for char_num in range(10):
    loss = criterion(final[char_num], labels[:,char_num])
    losses.append(loss)

total_loss = sum(losses)
total_loss.backward()
optimizer.step()
optimizer.zero_grad() 

print('Total loss: {0}'.format(total_loss.data[0]))

Total loss: 16.16602897644043


In [10]:
n = 3

it = iter(test_dataset)
data = it.next()

idx = data['idx']
inputs = Variable(data['image'].cuda())
labels = Variable(data['labels'].cuda())

layers = model(inputs)

final = layers['final']

# print(final)
rez = []
for i in range(10):
    rez.append(final[i][n].max(0)[1].data[0])
    
f = torch.FloatTensor(rez)

print(labels[n].view(1,10))

f.view(1, 10)

Variable containing:
    2     0     2     3     2     4     4     4     4     4
[torch.cuda.LongTensor of size 1x10 (GPU 0)]




    4     3     3     4     3     3     4     3     2     4
[torch.FloatTensor of size 1x10]

In [178]:
it = iter(train_dataset)

idn = 0

value = it.next()
value = Variable(value['image'].cuda())

value.data.resize_(batch_size,1, 58, 372)

pred = model(value)

for i, key in enumerate(pred):
    save_im(pred[key][idn], './parse_layers/{1}.png'.format(i,key))

./parse_layers/2_relu1.png    torch.Size([16, 56, 370])     torch.Size([896, 370])
./parse_layers/4_conv2.png    torch.Size([32, 26, 183])     torch.Size([832, 183])
./parse_layers/5_relu2.png    torch.Size([32, 26, 183])     torch.Size([832, 183])
./parse_layers/6_pool2.png    torch.Size([32, 13, 91])     torch.Size([416, 91])
./parse_layers/1_conv1.png    torch.Size([16, 56, 370])     torch.Size([896, 370])
./parse_layers/3_pool1.png    torch.Size([16, 28, 185])     torch.Size([448, 185])
./parse_layers/9_pool3.png    torch.Size([64, 5, 44])     torch.Size([320, 44])
./parse_layers/8_relu3.png    torch.Size([64, 11, 89])     torch.Size([704, 89])
./parse_layers/7_conv3.png    torch.Size([64, 11, 89])     torch.Size([704, 89])
