In [1]:
import kagglehub
dataset_path = kagglehub.dataset_download('bhavikjikadara/dog-and-cat-classification-dataset')

In [2]:
import os
directory = os.path.join(dataset_path, 'PetImages')

In [3]:
import shutil
import random
from PIL import Image

In [5]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms

In [7]:
IMAGE_SIZE = 32
BATCH_SIZE = 64
SEED = 42

In [8]:
random.seed(SEED)
torch.manual_seed(SEED)

<torch._C.Generator at 0x24cf1717910>

In [9]:
transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [10]:
from torchvision.datasets import ImageFolder

In [11]:
full_dataset = ImageFolder(root=directory, transform=transform)

In [12]:
print(len(full_dataset))

24998


In [13]:
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size

train_dataset, test_dataset = torch.utils.data.random_split(
    full_dataset, 
    [train_size, test_size], 
    generator=torch.Generator().manual_seed(SEED)
)

In [14]:
print(len(train_dataset))
print(len(test_dataset))

19998
5000


In [15]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [17]:
lenet5_model = nn.Sequential(
    nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5),
    nn.ReLU(),
    
    nn.AvgPool2d(kernel_size=2, stride=2),
    
    nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5),
    nn.ReLU(),
    
    nn.AvgPool2d(kernel_size=2, stride=2),
    
    nn.Flatten(),
    
    nn.Linear(in_features=16 * 5 * 5, out_features=120),
    nn.ReLU(),
    
    nn.Linear(in_features=120, out_features=84),
    nn.ReLU(),
    
    nn.Linear(in_features=84, out_features=2) 
)

lenet5_model.to(device)

Sequential(
  (0): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (1): ReLU()
  (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (4): ReLU()
  (5): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (6): Flatten(start_dim=1, end_dim=-1)
  (7): Linear(in_features=400, out_features=120, bias=True)
  (8): ReLU()
  (9): Linear(in_features=120, out_features=84, bias=True)
  (10): ReLU()
  (11): Linear(in_features=84, out_features=2, bias=True)
)

In [18]:
dummy_input = torch.randn(64, 3, 32, 32).to(device) 
output = lenet5_model(dummy_input)

In [19]:
import torch.optim as optim

In [20]:
NUM_EPOCHS = 10
LEARNING_RATE = 0.001

In [21]:
criterion = nn.CrossEntropyLoss()

In [22]:
optimizer = optim.Adam(lenet5_model.parameters(), lr=LEARNING_RATE)

In [23]:
from tqdm import tqdm

In [24]:
def train_epoch(model, loader, criterion, optimizer, device):
    
    model.train()
    running_loss = 0.0
    
    loop = tqdm(loader, desc='Training', leave=True)
    
    for batch_idx, (data, targets) in enumerate(loop):
        data = data.to(device)
        targets = targets.to(device)
        
        scores = model(data)
        loss = criterion(scores, targets)
        
        optimizer.zero_grad()
        
        loss.backward()
        
        optimizer.step()
        
        running_loss += loss.item()
        
        loop.set_postfix(loss=loss.item())

    avg_loss = running_loss / len(loader)
    return avg_loss

In [25]:
def check_accuracy(loader, model, device):
    
    model.eval() 
    
    num_correct = 0
    num_samples = 0
    
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)
            
            scores = model(x)
            
            _, predictions = scores.max(dim=1) 
            
            num_correct += (predictions == y).sum().item()
            num_samples += predictions.size(0)
            
    accuracy = (num_correct / num_samples) * 100
    
    model.train() 
    
    return accuracy

In [26]:
import time

In [29]:
start_time = time.time()

test_accuracy_before = check_accuracy(test_loader, lenet5_model, device)
print(f"pred train {test_accuracy_before:.2f}%")
print("\n")

print(f"--- epochs {NUM_EPOCHS} ---")

for epoch in range(1, NUM_EPOCHS + 1):
    print(f"\n--- epoch {epoch}/{NUM_EPOCHS} ---")
    
    train_loss = train_epoch(
        lenet5_model, 
        train_loader, 
        criterion, 
        optimizer, 
        device
    )
    
    test_accuracy = check_accuracy(test_loader, lenet5_model, device)
    
    print(f"avg loss per epoch: {train_loss:.4f}")
    print(f"accuracy on test: {test_accuracy:.2f}%")

end_time = time.time()
total_time = end_time - start_time

print(f"total train time {total_time:.2f} sec")

pred train 75.34%


--- epochs 10 ---

--- epoch 1/10 ---


Training: 100%|██████████████████████████████████████████████████████████| 313/313 [00:24<00:00, 12.60it/s, loss=0.407]


avg loss per epoch: 0.3581
accuracy on test: 73.52%

--- epoch 2/10 ---


Training: 100%|██████████████████████████████████████████████████████████| 313/313 [00:24<00:00, 12.55it/s, loss=0.184]


avg loss per epoch: 0.3345
accuracy on test: 73.98%

--- epoch 3/10 ---


Training: 100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.35it/s, loss=0.335]


avg loss per epoch: 0.2986
accuracy on test: 73.42%

--- epoch 4/10 ---


Training: 100%|███████████████████████████████████████████████████████████| 313/313 [00:24<00:00, 12.56it/s, loss=0.37]


avg loss per epoch: 0.2737
accuracy on test: 72.72%

--- epoch 5/10 ---


Training: 100%|██████████████████████████████████████████████████████████| 313/313 [00:24<00:00, 12.79it/s, loss=0.256]


avg loss per epoch: 0.2440
accuracy on test: 73.50%

--- epoch 6/10 ---


Training: 100%|██████████████████████████████████████████████████████████| 313/313 [00:24<00:00, 12.87it/s, loss=0.251]


avg loss per epoch: 0.2112
accuracy on test: 72.96%

--- epoch 7/10 ---


Training: 100%|█████████████████████████████████████████████████████████| 313/313 [00:24<00:00, 12.89it/s, loss=0.0932]


avg loss per epoch: 0.1760
accuracy on test: 71.68%

--- epoch 8/10 ---


Training: 100%|███████████████████████████████████████████████████████████| 313/313 [00:24<00:00, 12.73it/s, loss=0.21]


avg loss per epoch: 0.1550
accuracy on test: 72.18%

--- epoch 9/10 ---


Training: 100%|██████████████████████████████████████████████████████████| 313/313 [00:25<00:00, 12.45it/s, loss=0.141]


avg loss per epoch: 0.1338
accuracy on test: 73.10%

--- epoch 10/10 ---


Training: 100%|█████████████████████████████████████████████████████████| 313/313 [00:24<00:00, 12.62it/s, loss=0.0391]


avg loss per epoch: 0.1100
accuracy on test: 72.32%
total train time 429.54 sec


In [34]:
# Я делал 20 эпох, это вторая десятка
# LeNet создан для мниста (ч/б картинки низкого разрешения), с rgb + hd/fullhd он не справляется
# либо это я криворукий, что наиболее вероятно
# но исходя из того, сколько примерно нужно сверточных слоев для hd/fullhd картинок и сколько надо fc слоев,
# текущее архитектура не позволяет создать нужные нелинейные зависимости для увеличения точности бинарной классификации

# и как я понял, чем меньше мы используем convolution, тем больше на выходе нам нужно fc

# А, и да, вместо tanh/sigmoid как в оригинальном lenet5 я использовал relu, потому что там результаты вообще печальные