In [52]:
import torch
import pandas as pd
import torchvision
from torchvision.datasets import MNIST
from torchvision import transforms
import torchvision.transforms as tfs
from sklearn.metrics import accuracy_score
from torch import FloatTensor
from torch.utils.data import DataLoader
import numpy as np
from torch.nn.functional import cross_entropy
import matplotlib.pyplot as plt
import torch.nn as nn
from torchsummary import summary
from sklearn.metrics import classification_report
from tqdm import tqdm_notebook
import torch.nn as nn
import torch.nn.functional as F

%matplotlib inline
device = torch.device('cpu')
torch.set_num_threads(10)

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Resize((312, 440)),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.ImageFolder(root='./data_emo/train', transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.ImageFolder(root='./data_emo/test', transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=128,
                                         shuffle=False, num_workers=2)

classes = ('af', 'an', 'di', 'ha',
           'ne', 'sa', 'su')


In [53]:
next(iter(trainloader))[0].shape

torch.Size([128, 3, 312, 440])

In [None]:
# случайный индекс от 0 до размера тренировочной выборки
i = np.random.randint(low=0, high=2000)

#plt.imshow(trainloader.dataset.imgs[i], cmap='gray');

plt.imshow(plt.imread(trainloader.dataset.imgs[i][0]))

In [55]:
class MyConvNet(nn.Module):
    def __init__(self):
        # вызов конструктора предка
        super().__init__()
        # необходмо заранее знать, сколько каналов у картинки (сейчас = 1),
        # которую будем подавать в сеть, больше ничего
        # про входящие картинки знать не нужно
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3)
        self.fc1 = nn.Linear(76 * 108 * 128, 256)  # !!!
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 7)

    def forward(self, x):
        #(32,32)
        x = self.pool(F.relu(self.conv1(x)))
        #(14,14)
        x = self.pool(F.relu(self.conv2(x)))
        #(6,6)
        #print(x.shape)
        x = x.view(-1, 76 * 108 * 128)  # !!!
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [56]:
net = MyConvNet()

summary(net, (3, 312, 440))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 308, 436]           4,864
         MaxPool2d-2         [-1, 64, 154, 218]               0
            Conv2d-3        [-1, 128, 152, 216]          73,856
         MaxPool2d-4         [-1, 128, 76, 108]               0
            Linear-5                  [-1, 256]     268,960,000
            Linear-6                  [-1, 128]          32,896
            Linear-7                    [-1, 7]             903
Total params: 269,072,519
Trainable params: 269,072,519
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 1.57
Forward/backward pass size (MB): 122.04
Params size (MB): 1026.43
Estimated Total Size (MB): 1150.05
----------------------------------------------------------------


In [57]:
net = MyConvNet().to(device)

loss_fn = torch.nn.CrossEntropyLoss()

learning_rate = 1e-4
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

# итерируемся
for epoch in tqdm_notebook(range(10)):

    running_loss = 0.0
    for i, batch in enumerate(tqdm_notebook(trainloader)):
        # так получаем текущий батч
        X_batch, y_batch = batch

        # обнуляем веса
        optimizer.zero_grad()

        # forward + backward + optimize
        y_pred = net(X_batch.to(device))
        loss = loss_fn(y_pred, y_batch.to(device))
        loss.backward()
        optimizer.step()

        # выведем текущий loss
        running_loss += loss.item()
        # выводем качество каждые 2000 батчей
        if i % 2000 == 1999:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Обучение закончено')

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

HBox(children=(IntProgress(value=0, max=16), HTML(value='')))

HBox(children=(IntProgress(value=0, max=16), HTML(value='')))

HBox(children=(IntProgress(value=0, max=16), HTML(value='')))

HBox(children=(IntProgress(value=0, max=16), HTML(value='')))

HBox(children=(IntProgress(value=0, max=16), HTML(value='')))

HBox(children=(IntProgress(value=0, max=16), HTML(value='')))

HBox(children=(IntProgress(value=0, max=16), HTML(value='')))

HBox(children=(IntProgress(value=0, max=16), HTML(value='')))

HBox(children=(IntProgress(value=0, max=16), HTML(value='')))

HBox(children=(IntProgress(value=0, max=16), HTML(value='')))


Обучение закончено


In [58]:
class_correct = list(0. for i in range(7))
class_total = list(0. for i in range(7))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        y_pred = net(images.to(device))#.view(4, -1))
        _, predicted = torch.max(y_pred, 1)
        c = (predicted.cpu().detach() == labels).squeeze()
        for i in range(c.shape[0]):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(7):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

sum(class_correct)/sum(class_total)

Accuracy of    af : 85 %
Accuracy of    an : 65 %
Accuracy of    di : 85 %
Accuracy of    ha : 90 %
Accuracy of    ne : 85 %
Accuracy of    sa : 60 %
Accuracy of    su : 85 %


0.7928571428571428