In [207]:
import torch
import torch.nn as nn
import torch.nn.functional as F 
import torchvision as tv

import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
from tqdm import tqdm

In [233]:
class Dataset2class(torch.utils.data.Dataset): #2 клааса, наследуем 
    def  __init__(self, path_cats:str, path_dogs:str):
        super().__init__()

        self.path_cats = path_cats
        self.path_dogs = path_dogs

        self.cats_list = sorted(os.listdir(path_cats))
        self.dogs_list = sorted(os.listdir(path_dogs))

    def __len__(self): #функция для получения длины датасета
        return len(self.cats_list) + len(self.dogs_list)

    def __getitem__(self,idx): #для получения индексов, чтобы использовать как массив
        if idx < len(self.cats_list):
            class_id = 0
            img_path = os.path.join(self.path_cats, self.cats_list[idx])
        else:
            class_id = 1
            idx-=len(self.cats_list) #чтобы обращаться ко второй папке с 0-го элемента
            #обе папки стоят подряд в датасете

            img_path = os.path.join(self.path_dogs, self.dogs_list[idx]) #склеиваем два пути в один
                                                                    #сначала название папки общей, затем уже конкретно коты или собаки
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)

        img = img.astype(np.float32) #значения пикселей храним в вещественном формате
        img = img/255.0 #нормализуем от 0 до 1

        img = cv2.resize(img, (64,64), interpolation = cv2.INTER_AREA)

        img = img.transpose((2,0,1))

        t_img = torch.from_numpy(img)
        t_class_id = torch.tensor(class_id)


        return {'img': t_img, 'label' : t_class_id}


In [234]:
train_cats_path = '.\dataset_cats_and_dogs\\train\cats'
train_dogs_path = '.\dataset_cats_and_dogs\\train\dogs'

train_ds_cats_dogs = Dataset2class(train_cats_path, train_dogs_path)

test_cats_path = '.\dataset_cats_and_dogs\\test\cats'
test_dogs_path = '.\dataset_cats_and_dogs\\test\dogs'

test_ds_cats_dogs = Dataset2class(test_cats_path, test_dogs_path)


In [235]:
#plt.imshow(train_ds_cats_dogs[2][0]) #это кортеж, поэтому сначала выбираем номер кортежа, затем элемент в нём
# plt.imshow(train_ds_cats_dogs[2]['img']) 

In [236]:
train_ds_cats_dogs[2]['img']

tensor([[[0.0720, 0.0782, 0.0784,  ..., 0.0645, 0.0627, 0.0627],
         [0.0720, 0.0782, 0.0784,  ..., 0.0645, 0.0627, 0.0627],
         [0.0720, 0.0782, 0.0784,  ..., 0.0645, 0.0627, 0.0627],
         ...,
         [0.1739, 0.2013, 0.1602,  ..., 0.0510, 0.0510, 0.0476],
         [0.1804, 0.2138, 0.1499,  ..., 0.0469, 0.0469, 0.0435],
         [0.1867, 0.2064, 0.1010,  ..., 0.0431, 0.0431, 0.0398]],

        [[0.0759, 0.0821, 0.0824,  ..., 0.0684, 0.0667, 0.0667],
         [0.0759, 0.0821, 0.0824,  ..., 0.0684, 0.0667, 0.0667],
         [0.0759, 0.0821, 0.0824,  ..., 0.0684, 0.0667, 0.0667],
         ...,
         [0.1902, 0.2085, 0.1607,  ..., 0.0527, 0.0510, 0.0476],
         [0.1978, 0.2210, 0.1504,  ..., 0.0486, 0.0469, 0.0435],
         [0.2041, 0.2137, 0.1015,  ..., 0.0449, 0.0431, 0.0398]],

        [[0.0955, 0.1017, 0.1020,  ..., 0.0880, 0.0863, 0.0863],
         [0.0955, 0.1017, 0.1020,  ..., 0.0880, 0.0863, 0.0863],
         [0.0955, 0.1017, 0.1020,  ..., 0.0880, 0.0863, 0.

In [237]:
batch_size = 16

train_loader = torch.utils.data.DataLoader(
    train_ds_cats_dogs, shuffle = True, 
    batch_size = batch_size, num_workers = 0, drop_last = True
) #drop_last = True - выбрасываем последний элемент

tesr_loader = torch.utils.data.DataLoader(
    test_ds_cats_dogs, shuffle = True, 
    batch_size = batch_size, num_workers = 1, drop_last = False
)

In [238]:
class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()

        self.act = nn.LeakyReLU(0.2)
        self.maxpool = nn.MaxPool2d (2,2)
        self.conv0 = nn.Conv2d(3,32,3, stride=1, padding = 0)
        self.conv1 = nn.Conv2d(32,32,3, stride=1, padding = 0)
        self.conv2 = nn.Conv2d(32,64,3, stride=1, padding = 0)
        self.conv3 = nn.Conv2d(64,64,3, stride=1, padding = 0)
        self.conv4 = nn.Conv2d(64,64,3, stride=1, padding = 0)

        self.adaptivepool = nn.AdaptiveAvgPool2d((1,1))
        self.flatten = nn.Flatten()
        self.linear1=nn.Linear(64,10)
        self.linear2=nn.Linear(10,2) #только 2 класса на выходе

    def forward(self, x):

        out = self.conv0(x)
        out = self.act(out)
        out = self.maxpool(out)

        out = self.conv1(out)
        out = self.act(out)
        out = self.maxpool(out)

        out = self.conv2(out)
        out = self.act(out)
        out = self.maxpool(out)

        out = self.conv3(out)
        out = self.act(out)
       
        out = self.adaptivepool(out)
        out = self.flatten(out)
        out = self.linear1(out)
        out = self.act(out)
        out = self.linear2(out)



        return out



In [239]:
net = ConvNet()

In [240]:
net

ConvNet(
  (act): LeakyReLU(negative_slope=0.2)
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (adaptivepool): AdaptiveAvgPool2d(output_size=(1, 1))
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear1): Linear(in_features=64, out_features=10, bias=True)
  (linear2): Linear(in_features=10, out_features=2, bias=True)
)

In [241]:
for sample in train_loader:
    img = sample['img']
    label = sample['label']
    net(img)
    break

In [242]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr =0.0001, betas = (0.9, 0.))

In [243]:
def accuracy(pred, label):
  answer = F.softmax(pred.detach()).numpy().argmax(1) == label.numpy().argmax(1)

  # detach() удаляет граф вычислений (как бы историю) с тензора. Отвязывает.
# Back propagation не будет идти дальше этого тензора.

  return answer.mean()

In [244]:
def count_parameers(model):
  return sum(p.numel() for p in net.parameters() if p.requires_grad)

In [245]:
count_parameers(net) #число параметров сети

103168

In [248]:
epochs = 10

# for epoch in range(epochs):
#   loss_val = 0
#   acc_val = 0
#   # тут будут пачки по 16 сэмплов
#   for img, label in (pbar := tqdm(dataloader)): #для каждого изображения считается loss и градиент


for epoch in range(epochs):
  loss_val = 0 
  acc_val = 0
  for sample in (pbar := tqdm(train_loader)):
    img, label = sample['img'], sample['label']
    optimizer.zero_grad()  #обновляем градиенты


    label = (torch.Tensor(F.one_hot(label, 2))).float() 


    pred = net(img) #здесь 10 значений (для каждого класса) в 16 тензорах(размер батча)

    loss = loss_fn(pred, label)

    loss.backward()
    loss_item = loss.item()
    loss_val += loss_item

    optimizer.step()

    acc_current = accuracy(pred, label)
    acc_val += acc_current #мы же выводим среднее, поэтому суммируем


  
  pbar.set_description(f'loss: {loss_item:.5f}\taccuracy: {acc_current:.3f}')

  print(loss_val/len(train_loader))
  print(acc_val/len(train_loader))

  answer = F.softmax(pred.detach()).numpy().argmax(1) == label.numpy().argmax(1)
100%|██████████| 34/34 [00:09<00:00,  3.57it/s]


0.6991277231889612
0.49264705882352944


100%|██████████| 34/34 [00:09<00:00,  3.57it/s]


0.6941955720677095
0.5036764705882353


100%|██████████| 34/34 [00:09<00:00,  3.52it/s]


0.7052505472127129
0.5147058823529411


100%|██████████| 34/34 [00:09<00:00,  3.56it/s]


0.7096734941005707
0.49816176470588236


100%|██████████| 34/34 [00:09<00:00,  3.53it/s]


0.7315180091296926
0.5202205882352942


100%|██████████| 34/34 [00:09<00:00,  3.62it/s]


3.024965309044894
0.49264705882352944


100%|██████████| 34/34 [00:09<00:00,  3.57it/s]


1.967220606172786
0.5091911764705882


100%|██████████| 34/34 [00:09<00:00,  3.54it/s]


1.3557054260197807
0.5036764705882353


100%|██████████| 34/34 [00:09<00:00,  3.53it/s]


0.981414770378786
0.5110294117647058


100%|██████████| 34/34 [00:09<00:00,  3.54it/s]

1.69448899696855
0.47610294117647056



