Данные для модели
"https://storage.googleapis.com/dlcourse_ai/train.zip"
"https://storage.googleapis.com/dlcourse_ai/test.zip"

In [1]:
#! pip install opencv-python

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from torchmetrics import Accuracy

import torchvision as tv

import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

In [2]:
class Dataset2class(torch.utils.data.Dataset):
    def __init__(self, path_dir:str):
        super().__init__()
        
        self.path_dir = path_dir #путь к папке
        self.dir_list = sorted(os.listdir(path_dir)) #список файлов
                
    def __len__(self):
        return len(self.dir_list) # инициализация длины
    
    def __getitem__(self,idx): #возврат нужного изображения по индексу и его метки класса
        
        class_id = 1 if 'frankfurter' in self.dir_list[idx] or 'chili-dog' in self.dir_list[idx] or 'hotdog' in self.dir_list[idx] else 0
        
        img_path = os.path.join(self.path_dir, self.dir_list[idx])
        img = cv2.imread(img_path, cv2.IMREAD_COLOR) #в матрицу
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # нужная кодировка
        img = img.astype(np.float32) # во флоат для модели
        img = img/255.0 # нормализация
        
        img = cv2.resize(img, (64,64), interpolation = cv2.INTER_AREA) #сжатие и интерполяция
        
        img = img.transpose((2,0,1)) # каналы, высота, ширина
        
        t_img = torch.from_numpy(img) # в тензор
        
        t_class_id = torch.tensor(class_id) # индекс в тензор
        #return img
        return {'img':t_img, 'label':t_class_id}

In [3]:
train_path = r'.\train_kaggle'
test_path = r'.\test_kaggle'

In [4]:
train_ds_hotdogs = Dataset2class(train_path)
test_ds_hotdogs = Dataset2class(test_path)

In [5]:
len(train_ds_hotdogs), len(test_ds_hotdogs)

(4603, 1150)

In [6]:
#plt.imshow(train_ds_hotdogs[50])

In [7]:
BATCH_SIZE = 16

In [8]:
train_loader = torch.utils.data.DataLoader(
    train_ds_hotdogs, 
    batch_size=BATCH_SIZE, 
    
    shuffle = True,
    drop_last=True)
test_loader = torch.utils.data.DataLoader(
    test_ds_hotdogs, 
    batch_size=BATCH_SIZE, 
    
    shuffle = False,
    drop_last = False)

In [33]:
class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.act = nn.LeakyReLU(0.2)
        self.maxpool = nn.MaxPool2d(2,2)
        self.conv0 = nn.Conv2d(3,32,3, stride = 1, padding = 0) # каналы было, каналов стало, размер ядра
        self.conv1 = nn.Conv2d(32,32,3, stride = 1, padding = 0)
        self.conv2 = nn.Conv2d(32,64,3, stride = 1, padding = 0)
        self.conv3 = nn.Conv2d(64,128,3, stride = 1, padding = 0)
        
        self.adaptivepool = nn.AdaptiveAvgPool2d((1,1)) # сворачивание средним до 1х1
        self.flatten = nn.Flatten() # в вектор
        self.linear1 = nn.Linear(128,64) # линейное преобразование 
        self.linear2 = nn.Linear(64,16)
        self.linear3 = nn.Linear(16,2)
        
    def forward(self, x):

        out = self.conv0(x)
        out = self.act(out)
        out = self.maxpool(out)
        
        out = self.conv1(out)
        out = self.act(out)
        out = self.maxpool(out)
        
        out = self.conv2(out)
        out = self.act(out)
        out = self.maxpool(out)
        
        out = self.conv3(out)
        out = self.act(out)
        
        out = self.adaptivepool(out)
        out = self.flatten(out)

        out = self.linear1(out)
        out = self.act(out)

        out = self.linear2(out)
        out = self.act(out)
        
        out = self.linear3(out)
        
        return out

In [34]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [52]:
model = ConvNet()

In [53]:
model

ConvNet(
  (act): LeakyReLU(negative_slope=0.2)
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (adaptivepool): AdaptiveAvgPool2d(output_size=(1, 1))
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear1): Linear(in_features=128, out_features=64, bias=True)
  (linear2): Linear(in_features=64, out_features=16, bias=True)
  (linear3): Linear(in_features=16, out_features=2, bias=True)
)

In [54]:
count_parameters(model)

111826

In [55]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.005, betas = (0.9, 0.999))

In [56]:
accuracy = Accuracy(task='binary')

In [57]:
EPOCHS = 5
for epoch in range(EPOCHS):
    
    loss_val = 0
    acc_val = 0
    
    for sample in tqdm(train_loader):
        img, label = sample['img'], sample['label']
        optimizer.zero_grad()
        
        label = F.one_hot(label, 2).float()
        pred = model(img)
        
        loss = loss_fn(pred, label)
        
        loss.backward()
        loss_item = loss.item()
        loss_val += loss_item
        
        optimizer.step()
        
        acc_current = accuracy(pred, label)
        acc_val += acc_current
        
    #pbar.set_description(f'loss: {loss_item:.5f}\taccuracy: {acc_current:.3f}')
    print('loss =', loss_val/len(train_loader))
    print('accuracy =', acc_val/len(train_loader))

  0%|          | 0/287 [00:00<?, ?it/s]

loss = 0.5743002699434965
accuracy = tensor(0.6863)


  0%|          | 0/287 [00:00<?, ?it/s]

loss = 0.5196866001090106
accuracy = tensor(0.7060)


  0%|          | 0/287 [00:00<?, ?it/s]

loss = 0.5121739105376633
accuracy = tensor(0.7113)


  0%|          | 0/287 [00:00<?, ?it/s]

loss = 0.5134191083679631
accuracy = tensor(0.7180)


  0%|          | 0/287 [00:00<?, ?it/s]

loss = 0.48272849899758863
accuracy = tensor(0.7353)


In [58]:
loss_val = 0
acc_val = 0

for sample in tqdm(test_loader):
    with torch.no_grad():
        img, label = sample['img'], sample['label']

        label = F.one_hot(label, 2).float()
        pred = model(img)

        loss = loss_fn(pred, label)

        loss_item = loss.item()
        loss_val += loss_item

        acc_current = accuracy(pred, label)
        acc_val += acc_current

#pbar.set_description(f'loss: {loss_item:.5f}\taccuracy: {acc_current:.3f}')
print('loss =', loss_val/len(test_loader))
print('accuracy =', acc_val/len(test_loader))

  0%|          | 0/72 [00:00<?, ?it/s]

loss = 0.3611502595659759
accuracy = tensor(0.8996)
