In [8]:
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import ImageFolder
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch
import numpy as np
import matplotlib.pyplot as plt

def imshow(img):
    img = img / 2 + 0.5  # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

transform = transforms.Compose(
    [transforms.Resize(64),  # Изменяем размер изображений до 64x64
     transforms.CenterCrop(64),  # Обрезаем изображение до 64x64
     transforms.RandomHorizontalFlip(),  # Случайное горизонтальное переворачивание
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# dataset = ImageFolder(root='open_images_classification', transform=transform)
dataset = ImageFolder(root='open_images_house_2', transform=transform)

train_size = int(0.8 * len(dataset))
val_size = int((len(dataset) - train_size)/2)
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

trainloader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=2)
valLoader = DataLoader(val_dataset, batch_size=4, shuffle=False, num_workers=2)
testloader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=2)

classes = dataset.classes

# Определение нейронной сети
class ConvNeuralNet(nn.Module):
    def __init__(self, num_classes):
        super(ConvNeuralNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(64)
        self.conv2 = nn.Conv2d(64, 128, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(128)
        self.conv3 = nn.Conv2d(128, 256, 3, padding=1)
        self.bn3 = nn.BatchNorm2d(256)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(256 * 8 * 8, 512)  # Размер входа зависит от размера после conv/pool слоев
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, num_classes)  # 10 выходных нейронов для многоклассовой классификации

    def forward(self, x):
        # print(f'Input: {x.shape}')
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        # print(f'After conv1: {x.shape}')
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        # print(f'After conv2: {x.shape}')
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        # print(f'After conv3: {x.shape}')
        x = x.view(-1, 256 * 8 * 8)
        # print(f'After view: {x.shape}')
        x = F.relu(self.fc1(x))
        # print(f'After fc1: {x.shape}')
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        # print(f'After fc2: {x.shape}')
        x = self.dropout(x)
        x = self.fc3(x)  # Используем линейный слой для многоклассовой классификации
        # print(f'Output: {x.shape}')
        return x

device = "cuda" if torch.cuda.is_available() else "cpu"

# set the model to device
model = ConvNeuralNet(len(classes)).to(device)

# set loss function
loss_func = nn.CrossEntropyLoss()

# set learning rate 
lerning_rate = 0.010

# set optimizer as SGD
optimizer = torch.optim.SGD(
    model.parameters(), lr=lerning_rate
) 


In [9]:
# Обучение модели
num_epochs = 10

for epoch in range(num_epochs):  # количество эпох
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_func(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        # print('loss ', epoch, ' -- ', running_loss)

    print('Обучение завершен  ======   ')

    # at end of epoch check validation loss and acc
    with torch.no_grad():
      	# switch model to eval (not train) model
        model.eval()
        correct = 0
        total = 0
        all_val_loss = []
        for images, labels in valLoader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            total += labels.size(0)
            # calculate predictions
            predicted = torch.argmax(outputs, dim=1)
            # calculate actual values
            correct += (predicted == labels).sum().item()
            # calculate the loss
            all_val_loss.append(loss_func(outputs, labels).item())
        # calculate val-loss
        mean_val_loss = sum(all_val_loss) / len(all_val_loss)
        # calculate val-accuracy
        mean_val_acc = 100 * (correct / total)

    print(
        'Epoch [{}/{}], Loss: {:.4f}, Val-loss: {:.4f}, Val-acc: {:.1f}%'.format(
            epoch+1, num_epochs, loss.item(), mean_val_loss, mean_val_acc
        )
    )

    print('Завершена валидация  ======   ')

# Оценка модели
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        print('labels --->', labels)
        _, predicted = torch.max(outputs.data, 1)
        print('predicted --->', predicted)
        total += labels.size(0)
        print('total --->', total)
        correct += (predicted == labels).sum().item()
        print('correct --->', correct)
        # Вывод изображений и предсказаний
        for i in range(len(images)):
            print(f'Истинный класс: {classes[labels[i]]}, Предсказанный класс: {classes[predicted[i]]}')
            # imshow(images[i])

print(f'Точность на тестовых данных: {100 * correct / total:.2f}%')

# save to file
torch.save(model, 'cnn_houses_binary.pt')


Epoch [1/10], Loss: 0.9924, Val-loss: 0.6690, Val-acc: 60.9%
Epoch [2/10], Loss: 0.4931, Val-loss: 0.6333, Val-acc: 63.5%
Epoch [3/10], Loss: 0.4824, Val-loss: 0.5824, Val-acc: 68.9%
Epoch [4/10], Loss: 1.0247, Val-loss: 0.5958, Val-acc: 71.7%
Epoch [5/10], Loss: 0.3006, Val-loss: 0.5300, Val-acc: 74.5%
Epoch [6/10], Loss: 0.2064, Val-loss: 0.5398, Val-acc: 74.1%
Epoch [7/10], Loss: 0.6483, Val-loss: 0.5990, Val-acc: 70.7%
Epoch [8/10], Loss: 0.7556, Val-loss: 0.5396, Val-acc: 72.9%
Epoch [9/10], Loss: 0.7653, Val-loss: 0.5061, Val-acc: 76.0%
Epoch [10/10], Loss: 0.1215, Val-loss: 0.5220, Val-acc: 73.9%
labels ---> tensor([0, 0, 0, 0])
predicted ---> tensor([1, 1, 0, 0])
total ---> 4
correct ---> 2
Истинный класс: Building, Предсказанный класс: Other
Истинный класс: Building, Предсказанный класс: Other
Истинный класс: Building, Предсказанный класс: Building
Истинный класс: Building, Предсказанный класс: Building
labels ---> tensor([1, 0, 1, 0])
predicted ---> tensor([1, 1, 1, 0])
total