In [11]:
import kagglehub
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from tqdm import tqdm
from torchvision.datasets import ImageFolder

In [3]:
dataset_path = kagglehub.dataset_download('bhavikjikadara/dog-and-cat-classification-dataset')

In [4]:
directory = os.path.join(dataset_path, 'PetImages')

In [8]:
IMAGE_SIZE = 224
BATCH_SIZE = 64

alexnet_transforms = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [13]:
full_dataset = ImageFolder(root=directory, transform=alexnet_transforms)

In [14]:
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size])

In [15]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

In [16]:
import torch.nn as nn

In [18]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [19]:
features = nn.Sequential(
    nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),
    nn.ReLU(inplace=True),
    nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2), 
    nn.MaxPool2d(kernel_size=3, stride=2),

    nn.Conv2d(96, 256, kernel_size=5, padding=2),
    nn.ReLU(inplace=True),
    nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
    nn.MaxPool2d(kernel_size=3, stride=2),

    nn.Conv2d(256, 384, kernel_size=3, padding=1),
    nn.ReLU(inplace=True),

    nn.Conv2d(384, 384, kernel_size=3, padding=1),
    nn.ReLU(inplace=True),

    nn.Conv2d(384, 256, kernel_size=3, padding=1),
    nn.ReLU(inplace=True),
    nn.MaxPool2d(kernel_size=3, stride=2),
)

FLATTEN_SIZE = 6 * 6 * 256 

classifier = nn.Sequential(
    nn.Dropout(p=0.5),
    nn.Linear(FLATTEN_SIZE, 4096),
    nn.ReLU(inplace=True),
    
    nn.Dropout(p=0.5),
    nn.Linear(4096, 4096),
    nn.ReLU(inplace=True),
    
    nn.Linear(4096, 2),
)

alexnet_model = nn.Sequential(
    features,
    nn.Flatten(),
    classifier
).to(device)

In [21]:
import torch.optim as optim
import time

In [23]:
def check_accuracy(loader, model, device):
    
    model.eval() 
    
    num_correct = 0
    num_samples = 0
    
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)
            
            scores = model(x)
            
            _, predictions = scores.max(dim=1) 
            
            num_correct += (predictions == y).sum().item()
            num_samples += predictions.size(0)
            
    accuracy = (num_correct / num_samples) * 100
    
    model.train() 
    
    return accuracy

In [25]:
def train_epoch(model, loader, criterion, optimizer, device):
    
    model.train()
    running_loss = 0.0
    
    loop = tqdm(loader, desc='Training', leave=True)
    
    for batch_idx, (data, targets) in enumerate(loop):
        data = data.to(device)
        targets = targets.to(device)
        
        scores = model(data)
        loss = criterion(scores, targets)
        
        optimizer.zero_grad()
        
        loss.backward()
        
        optimizer.step()
        
        running_loss += loss.item()
        
        loop.set_postfix(loss=loss.item())

    avg_loss = running_loss / len(loader)
    return avg_loss

In [26]:
LEARNING_RATE = 1e-4
NUM_EPOCHS = 10 

criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(alexnet_model.parameters(), lr=LEARNING_RATE)

test_accuracy_before = check_accuracy(test_loader, alexnet_model, device)
print(f"pred train {test_accuracy_before:.2f}%")
print("\n")

print(f"--- epochs {NUM_EPOCHS} ---")

start_time = time.time()

for epoch in range(1, NUM_EPOCHS + 1):
    print(f"\n--- epoch {epoch}/{NUM_EPOCHS} ---")
    
    train_loss = train_epoch(
        alexnet_model, 
        train_loader, 
        criterion, 
        optimizer, 
        device
    )
    
    test_accuracy = check_accuracy(test_loader, alexnet_model, device)
    
    print(f"avg loss per epoch: {train_loss:.4f}")
    print(f"accuracy on test: {test_accuracy:.2f}%")

end_time = time.time()
total_time = end_time - start_time

print(f"total train time {total_time:.2f} sec")

pred train 50.44%


--- epochs 10 ---

--- epoch 1/10 ---


Training: 100%|██████████████████████████████████████████████████████████| 313/313 [01:24<00:00,  3.70it/s, loss=0.707]


avg loss per epoch: 0.6367
accuracy on test: 71.92%

--- epoch 2/10 ---


Training: 100%|██████████████████████████████████████████████████████████| 313/313 [01:17<00:00,  4.04it/s, loss=0.608]


avg loss per epoch: 0.5021
accuracy on test: 80.44%

--- epoch 3/10 ---


Training: 100%|██████████████████████████████████████████████████████████| 313/313 [01:17<00:00,  4.04it/s, loss=0.155]


avg loss per epoch: 0.3912
accuracy on test: 78.74%

--- epoch 4/10 ---


Training: 100%|██████████████████████████████████████████████████████████| 313/313 [01:17<00:00,  4.06it/s, loss=0.263]


avg loss per epoch: 0.3200
accuracy on test: 86.74%

--- epoch 5/10 ---


Training: 100%|██████████████████████████████████████████████████████████| 313/313 [01:17<00:00,  4.04it/s, loss=0.277]


avg loss per epoch: 0.2645
accuracy on test: 87.14%

--- epoch 6/10 ---


Training: 100%|██████████████████████████████████████████████████████████| 313/313 [01:17<00:00,  4.03it/s, loss=0.138]


avg loss per epoch: 0.2182
accuracy on test: 88.94%

--- epoch 7/10 ---


Training: 100%|██████████████████████████████████████████████████████████| 313/313 [01:17<00:00,  4.06it/s, loss=0.111]


avg loss per epoch: 0.1854
accuracy on test: 89.26%

--- epoch 8/10 ---


Training: 100%|██████████████████████████████████████████████████████████| 313/313 [01:17<00:00,  4.06it/s, loss=0.074]


avg loss per epoch: 0.1608
accuracy on test: 89.98%

--- epoch 9/10 ---


Training: 100%|█████████████████████████████████████████████████████████| 313/313 [01:17<00:00,  4.04it/s, loss=0.0568]


avg loss per epoch: 0.1330
accuracy on test: 89.34%

--- epoch 10/10 ---


Training: 100%|███████████████████████████████████████████████████████████| 313/313 [01:17<00:00,  4.05it/s, loss=0.12]


avg loss per epoch: 0.1113
accuracy on test: 90.06%
total train time 986.69 sec


In [28]:
# Ну, я конечно большего ожидал от 60 млн параметров
# но опять же, возможно я криворукий