In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
import torch.nn.functional as F

from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, Dataset

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

#python img lib
from PIL import Image

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


## Подготовка данных

In [2]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, file_list, transform=None):
        self.file_list = file_list
        self.transform = transform
        
    def __len__(self):
        self.filelength = len(self.file_list)
        return self.filelength
    
    def __getitem__(self, idx):
        img_path = self.file_list[idx]
        img = Image.open(img_path)
        img_transformed = self.transform(img)
        
        label = img_path.split('/')[-1].split('.')[0]
        if label == 'dog':
            label = 1
        else:
            label = 0
            
        return img_transformed, label

In [3]:
train_data = []

for x in os.listdir("./data/train"):
    train_data.append("./data/train/" + x)

In [4]:
from sklearn.model_selection import train_test_split
train_list, val_list = train_test_split(train_data, test_size=0.2)

In [5]:
def_transform = transforms.Compose([   
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # default normalize
])

train_data = Dataset(train_list, transform=def_transform)
val_data = Dataset(val_list, transform=def_transform)

In [6]:
len(train_data)

20000

In [7]:
len(val_data)

5000

In [8]:
# Проверка трансформа изображения
train_data[0][0].shape

torch.Size([3, 32, 32])

### Архитектуру заимствуем с сайта PyTorch

In [9]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()

In [10]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [11]:
trainloader = torch.utils.data.DataLoader(train_data)

In [12]:
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

print('Finished Training')

[1,  2000] loss: 0.798
[1,  4000] loss: 0.718
[1,  6000] loss: 0.709
[1,  8000] loss: 0.698
[1, 10000] loss: 0.693
[1, 12000] loss: 0.673
[1, 14000] loss: 0.650
[1, 16000] loss: 0.652
[1, 18000] loss: 0.644
[1, 20000] loss: 0.627
[2,  2000] loss: 0.629
[2,  4000] loss: 0.616
[2,  6000] loss: 0.614
[2,  8000] loss: 0.605
[2, 10000] loss: 0.617
[2, 12000] loss: 0.599
[2, 14000] loss: 0.586
[2, 16000] loss: 0.598
[2, 18000] loss: 0.588
[2, 20000] loss: 0.586
Finished Training


In [14]:
torch.save(net.state_dict(), './my_model.pth')

### Проверяем на валидационной выборке

In [17]:
correct = 0
total = 0

val_loader = torch.utils.data.DataLoader(val_data)

with torch.no_grad():
    for data in val_loader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = net(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network: {100 * correct // total} %')

Accuracy of the network: 71 %
