In [59]:
import torch    
import torch.nn as nn
import torch.nn.functional as F
import torchvision as tv

import numpy as np
import matplotlib.pyplot as plt
import cv2
import os

from tqdm import tqdm 

# Dataset

In [60]:
class Dataset2class(torch.utils.data.Dataset):
    def __init__(self, path_dir1:str, path_dir2:str):
        super().__init__()

        self.path_dir1 = path_dir1
        self.path_dir2 = path_dir2

        self.dir1_list = sorted(os.listdir(path_dir1))
        self.dir2_list = sorted(os.listdir(path_dir2))

    def __len__(self):
        return len(self.dir1_list) + len(self.dir2_list)

    def __getitem__(self, idx):
        if idx < len(self.dir1_list):
            class_id = 0
            img_path = os.path.join(self.path_dir1, self.dir1_list[idx])
        else:
            class_id = 1
            idx -= len(self.dir1_list)
            img_path = os.path.join(self.path_dir2, self.dir2_list[idx])

        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        if img is None:
            raise ValueError(f"Изображение по пути {img_path} не может быть загружено.")
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = img.astype(np.float32)
        img = img/255.0

        img = cv2.resize(img, (64, 64), interpolation = cv2.INTER_AREA)

        img = img.transpose((2, 0, 1))

        t_img = torch.from_numpy(img)
        t_class_id = torch.tensor(class_id)

        return {'img' : t_img, 'label' :  t_class_id}

In [61]:
train_ds_catsdogs = Dataset2class('C:/Users/Administrator/Desktop/AAT/ResNet/dataset/training_set/cats',
                             'C:/Users/Administrator/Desktop/AAT/ResNet/dataset/training_set/dogs')
test_ds_catsdogs = Dataset2class('C:/Users/Administrator/Desktop/AAT/ResNet/dataset/test_set/cats',
                             'C:/Users/Administrator/Desktop/AAT/ResNet/dataset/test_set/dogs')


In [62]:
len(train_ds_catsdogs)

8000

In [63]:
len(test_ds_catsdogs)

2000

In [64]:
train_ds_catsdogs[2]

{'img': tensor([[[0.8683, 0.8739, 0.8938,  ..., 0.9458, 0.9502, 0.9351],
          [0.8741, 0.8685, 0.8898,  ..., 0.9358, 0.9659, 0.9438],
          [0.8780, 0.8794, 0.8795,  ..., 0.9384, 0.9532, 0.9519],
          ...,
          [0.7955, 0.7723, 0.7345,  ..., 0.8576, 0.8383, 0.8471],
          [0.7844, 0.7690, 0.7846,  ..., 0.8570, 0.8432, 0.8493],
          [0.8279, 0.7893, 0.7976,  ..., 0.8518, 0.8464, 0.8472]],
 
         [[0.8722, 0.8778, 0.8948,  ..., 0.9497, 0.9541, 0.9401],
          [0.8780, 0.8725, 0.8927,  ..., 0.9397, 0.9698, 0.9500],
          [0.8819, 0.8834, 0.8817,  ..., 0.9423, 0.9571, 0.9554],
          ...,
          [0.8069, 0.7462, 0.6685,  ..., 0.8693, 0.8370, 0.8316],
          [0.8175, 0.7685, 0.7278,  ..., 0.8675, 0.8419, 0.8338],
          [0.8313, 0.7670, 0.7201,  ..., 0.8626, 0.8435, 0.8311]],
 
         [[0.8526, 0.8582, 0.8901,  ..., 0.9687, 0.9730, 0.9602],
          [0.8584, 0.8529, 0.8769,  ..., 0.9590, 0.9875, 0.9697],
          [0.8623, 0.8638, 0.8673

# Dataloader

In [65]:
batch_size = 16
train_loader = torch.utils.data.DataLoader(
    train_ds_catsdogs, shuffle=True, batch_size=batch_size,
    num_workers=0, drop_last=True)
test_loader = torch.utils.data.DataLoader(
    test_ds_catsdogs, shuffle=True, batch_size=batch_size,
    num_workers=0, drop_last=False)

In [66]:
for sample in train_loader:
    print(sample)
    break

{'img': tensor([[[[1.0000, 1.0000, 0.9166,  ..., 0.9889, 0.9711, 0.9903],
          [1.0000, 1.0000, 0.9166,  ..., 0.9844, 0.9707, 0.9926],
          [1.0000, 1.0000, 0.9152,  ..., 0.9794, 0.9689, 0.9933],
          ...,
          [0.1692, 0.1699, 0.1824,  ..., 0.6593, 0.6550, 0.5009],
          [0.1489, 0.1426, 0.3272,  ..., 0.6573, 0.6718, 0.4734],
          [0.6855, 0.9146, 0.9258,  ..., 0.6733, 0.6749, 0.4317]],

         [[0.9961, 0.9961, 0.9137,  ..., 0.9855, 0.9711, 0.9903],
          [0.9961, 0.9961, 0.9137,  ..., 0.9810, 0.9707, 0.9926],
          [0.9961, 0.9961, 0.9120,  ..., 0.9760, 0.9689, 0.9933],
          ...,
          [0.2211, 0.2106, 0.2144,  ..., 0.4815, 0.4522, 0.3622],
          [0.1437, 0.1343, 0.3110,  ..., 0.4759, 0.4764, 0.3460],
          [0.5955, 0.8192, 0.8266,  ..., 0.4870, 0.4811, 0.3100]],

         [[0.9804, 0.9804, 0.8980,  ..., 0.9977, 0.9743, 0.9861],
          [0.9804, 0.9804, 0.8980,  ..., 0.9945, 0.9739, 0.9885],
          [0.9804, 0.9804, 0.8964,

# Architecture

In [67]:
class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.act = nn.LeakyReLU(0.2)
        self.maxpool = nn.MaxPool2d(2, 2)
        self.conv0 = nn.Conv2d(3, 32, 3, stride=1, padding=0)
        self.conv1 = nn.Conv2d(32, 32, 3, stride=1, padding=0)
        self.conv2 = nn.Conv2d(32, 64, 3, stride=1, padding=0)
        self.conv3 = nn.Conv2d(64, 64, 3, stride=1, padding=0)
        self.conv4 = nn.Conv2d(64, 64, 3, stride=1, padding=0)

        self.adaptivepool = nn.AdaptiveAvgPool2d((1, 1))
        self.flatten = nn.Flatten()
        self.linear1 = nn.Linear(64, 10)
        self.linear2 = nn.Linear(10, 2)

    
    def forward(self, x):
        out = self.conv0(x)
        out = self.act(out)
        out = self.maxpool(out)

        out = self.conv1(out)
        out = self.act(out)
        out = self.maxpool(out)

        out = self.conv2(out)
        out = self.act(out)
        out = self.maxpool(out)

        out = self.conv3(out)
        out = self.act(out)

        out = self.adaptivepool(out)
        out = self.flatten(out)
        out = self.linear1(out)
        out = self.act(out)
        out = self.linear2(out)

        return out

In [68]:
model = ConvNet()

In [69]:
def count_parametrs(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [70]:
model

ConvNet(
  (act): LeakyReLU(negative_slope=0.2)
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (adaptivepool): AdaptiveAvgPool2d(output_size=(1, 1))
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear1): Linear(in_features=64, out_features=10, bias=True)
  (linear2): Linear(in_features=10, out_features=2, bias=True)
)

In [71]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999))

In [72]:
count_parametrs(model)

103168

In [73]:
def accuracy(pred, label):
    answer = F.softmax(pred.detach()).numpy().argmax(1) == label.numpy().argmax(1)
    # print(answer)
    # print(answer.sum())
    return answer.mean()


# Train loop

In [74]:
epochs = 10
for epoch in range(epochs):
    loss_val = 0
    acc_val = 0
    for sample in (pbar := tqdm(train_loader)):
        img, label = sample['img'], sample['label']
        optimizer.zero_grad()
        label = F.one_hot(label, 2).float()
        pred = model(img)

        loss = loss_fn(pred, label)
        loss.backward()
        loss_item = loss.item()
        loss_val += loss_item

        optimizer.step()

        acc_current = accuracy(pred, label)
        acc_val += acc_current

        pbar.set_description(f'loss: {loss_item:.4f}\taccuracy: {acc_current:.3f}')
    print(loss_val/len(train_loader))
    print(acc_val/len(train_loader))

  answer = F.softmax(pred.detach()).numpy().argmax(1) == label.numpy().argmax(1)
loss: 0.7066	accuracy: 0.500: 100%|██████████| 500/500 [01:08<00:00,  7.26it/s]


0.6890115146636963
0.534875


loss: 0.6258	accuracy: 0.625: 100%|██████████| 500/500 [00:42<00:00, 11.81it/s]


0.6635780782103539
0.597


loss: 0.7183	accuracy: 0.625: 100%|██████████| 500/500 [00:43<00:00, 11.49it/s]


0.6081633687615394
0.67075


loss: 0.4658	accuracy: 0.688: 100%|██████████| 500/500 [00:44<00:00, 11.31it/s]


0.5472135010659694
0.7235


loss: 0.6312	accuracy: 0.625: 100%|██████████| 500/500 [00:42<00:00, 11.74it/s]


0.5024665699601173
0.7525


loss: 0.7542	accuracy: 0.562: 100%|██████████| 500/500 [00:43<00:00, 11.46it/s]


0.46825337007641793
0.7805


loss: 0.2812	accuracy: 0.875: 100%|██████████| 500/500 [00:42<00:00, 11.70it/s]


0.42599266244471073
0.802625


loss: 0.4127	accuracy: 0.875: 100%|██████████| 500/500 [00:43<00:00, 11.51it/s]


0.39575936684012414
0.81575


loss: 0.2710	accuracy: 0.812: 100%|██████████| 500/500 [00:42<00:00, 11.74it/s]


0.36625798599421977
0.83775


loss: 0.2095	accuracy: 0.938: 100%|██████████| 500/500 [00:41<00:00, 12.12it/s]

0.3372217895835638
0.8475





In [76]:
loss_val = 0
acc_val = 0
for sample in (pbar := tqdm(test_loader)):
    with torch.no_grad():
        img, label = sample['img'], sample['label']
        label = F.one_hot(label, 2).float()
        pred = model(img)

        loss = loss_fn(pred, label)
        loss_item = loss.item()
        loss_val += loss_item

        acc_current = accuracy(pred, label)
        acc_val += acc_current

    pbar.set_description(f'loss: {loss_item:.4f}\taccuracy: {acc_current:.3f}')
print(loss_val/len(test_loader))
print(acc_val/len(test_loader))

  answer = F.softmax(pred.detach()).numpy().argmax(1) == label.numpy().argmax(1)
loss: 0.2563	accuracy: 0.812: 100%|██████████| 125/125 [00:21<00:00,  5.80it/s]

0.4120151214003563
0.817





: 