In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision as tv

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import cv2
import os
from tqdm import tqdm

In [2]:
class Dataset2class(torch.utils.data.Dataset):
    def __init__ (self, path_dir1:str, path_dir2:str):
        super().__init__()
        
        self.path_dir1 = path_dir1
        self.path_dir2 = path_dir2
        
        self.dir1_list = sorted(os.listdir(path_dir1))
        self.dir2_list = sorted(os.listdir(path_dir2))
        
    def __len__(self):
        return len(self.dir1_list) + len(self.dir2_list)
    
    def __getitem__(self, idx):
        
        if idx < len(self.dir1_list):
            class_id = 0
            img_path = os.path.join(self.path_dir1, self.dir1_list[idx])
        
        else:
            class_id = 1
            idx -= len(self.dir1_list)
            img_path = os.path.join(self.path_dir2, self.dir2_list[idx])
        
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = img.astype(np.float32)
        img = img/255.0
        img = cv2.resize(img, (64, 64), interpolation=cv2.INTER_AREA)
        img = img.transpose((2, 0, 1)) # Первое место каналы, вторые высота, третье ширина
        
        t_img = torch.from_numpy(img)
        t_class_id = torch.tensor(class_id)
        
        return {'img': t_img, 'label': t_class_id}
        

In [3]:
Train_ds_cat_dogs = Dataset2class('C:\DATA\Computer Vision\Cats_vs_dogs\Train\dogs', 'C:\DATA\Computer Vision\Cats_vs_dogs\Train\cats')
Test_ds_cat_dogs = Dataset2class ('C:\DATA\Computer Vision\Cats_vs_dogs\Test\dogs', 'C:\DATA\Computer Vision\Cats_vs_dogs\Test\cats')

In [4]:
train_loader = torch.utils.data.DataLoader(Train_ds_cat_dogs, 
                                           shuffle=True, 
                                           batch_size=16,
                                           drop_last=True)

In [5]:
test_loader = torch.utils.data.DataLoader(Test_ds_cat_dogs, 
                                          shuffle=True, 
                                          batch_size=16,
                                          drop_last=False)

In [6]:
class Conv_net (nn.Module):
    def __init__(self):
        super().__init__()
        
        self.activation = nn.LeakyReLU(0.2)
        self.maxpool = nn.MaxPool2d(2,2)

        self.conv0 = nn.Conv2d(3, 32, 3, stride=1, padding=0)
        self.conv1 = nn.Conv2d(32, 32, 3, stride=1, padding=0)
        self.conv2 = nn.Conv2d(32, 64, 3, stride=1, padding=0)
        self.conv3 = nn.Conv2d(64, 128, 3, stride=1, padding=0)
        
        self.adaptive_pool = nn.AdaptiveAvgPool2d ((1,1))
        self.flatten = nn.Flatten()
        self.linear1 = nn.Linear (128, 20)
        self.linear2 = nn.Linear (20, 2)
        
    def forward (self, x):
        out = self.conv0(x)
        out = self.activation (out)
        out = self.maxpool(out)
        
        out = self.conv1(out)
        out = self.activation (out)
        out = self.maxpool(out)
        
        out = self.conv2(out)
        out = self.activation (out)
        out = self.maxpool(out)
        
        out = self.conv3(out)
        out = self.activation (out)
        
        out = self.adaptive_pool(out)
        out = self.flatten(out)
        
        out = self.linear1(out)
        out = self.activation(out)
        out = self.linear2(out)    
        
        return out
    
    

In [7]:
model = Conv_net()

In [8]:
for sample in train_loader:
    img = sample['img']
    label = sample['label']
    model(img)
    break

In [9]:
img.shape

torch.Size([16, 3, 64, 64])

In [10]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999))

In [11]:
def accuracy(pred, label):
    answer = F.softmax(pred.detach(), dim=1).numpy().argmax(1) == label.numpy().argmax(1) 
    return answer.mean()

In [18]:
epochs = 8

for epoch in range(epochs):
    x=0
    loss_val = 0
    acc_val = 0
    for sample in (pbar := tqdm(train_loader)):
        img, label = sample['img'], sample['label']
        optimizer.zero_grad()
        label = F.one_hot(label, 2).float()
        pred = model(img)
        
        loss = loss_fn(pred, label)
        
        loss.backward()
        loss_item = loss.item()
        loss_val += loss_item

        optimizer.step()

        acc_current = accuracy(pred, label)
        acc_val += acc_current

        if x==100:
            pbar.set_description(f'loss:{loss_item: .5f}  Accuracy:{acc_current: .3f}')
            x=0
        x+=1
    print("%.3f" % (loss_val/len(train_loader)))
    print("%.3f" % (acc_val/len(train_loader)))

loss: 0.16076  Accuracy: 0.938:  47%|██████████████████████▎                         | 233/500 [00:21<00:24, 10.83it/s]


KeyboardInterrupt: 

In [17]:
loss_val = 0
acc_val = 0
for sample in (pbar := tqdm(test_loader)):
    with torch.no_grad():
        img, label = sample['img'], sample['label']

        label = F.one_hot(label, 2).float()
        pred = model(img)

        loss = loss_fn(pred, label)
        loss_item = loss.item()
        loss_val += loss_item

        acc_current = accuracy(pred, label)
        acc_val += acc_current

    pbar.set_description(f'loss: {loss_item:.5f}\taccuracy: {acc_current:.3f}')
print(loss_val/len(test_loader))
print(acc_val/len(test_loader))

loss: 0.37279	accuracy: 0.875: 100%|█████████████████████████████████████████████████| 125/125 [00:09<00:00, 12.71it/s]

0.6133869337439537
0.8105



