In [1]:
import kagglehub
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from torch.utils.data import Subset
import numpy as np

In [2]:
import warnings

In [3]:
warnings.filterwarnings("ignore", "(Possibly corrupt EXIF data|Truncated File Read|load_weights) detected", module="PIL.Image")
warnings.filterwarnings("ignore", category=UserWarning)

In [4]:
dataset_path = kagglehub.dataset_download('bhavikjikadara/dog-and-cat-classification-dataset')

In [5]:
directory = os.path.join(dataset_path, 'PetImages')

In [6]:
def scan_and_clean_data(root_dir):
    image_paths = []
    labels = []
    
    for label_name, label_id in [('Cat', 0), ('Dog', 1)]:
        path = os.path.join(root_dir, label_name)
                
        for filename in tqdm(os.listdir(path), desc=f"Обработка {label_name}"):
            file_path = os.path.join(path, filename)
            
            if not filename.lower().endswith(('.jpg', '.jpeg')):
                continue
            
            try:
                img = Image.open(file_path)
                img.load()
                
                if img.mode not in ('RGB', 'L'):
                    continue

                image_paths.append(file_path)
                labels.append(label_id)
            except Exception:
                continue

    print(f"total {len(labels)}")
    print(f"cat {labels.count(0)}, dog {labels.count(1)}")
    return image_paths, labels

In [7]:
all_image_paths, all_labels = scan_and_clean_data(directory)

Обработка Cat: 100%|███████████████████████████████████████████████████████████| 12499/12499 [00:09<00:00, 1291.74it/s]
Обработка Dog: 100%|███████████████████████████████████████████████████████████| 12499/12499 [00:10<00:00, 1241.75it/s]

total 24936
cat 12473, dog 12463





In [8]:
class minclass4torch(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
        self.placeholder_image = Image.new('RGB', (224, 224), color = 'black')

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]
        image = self.placeholder_image
        
        try:
            image = Image.open(img_path).convert('RGB')
        except Exception as e:
            pass

        if self.transform:
            image = self.transform(image)
        
        return image, label

In [9]:
IMAGE_SIZE = 224
BATCH_SIZE = 64

In [10]:
alexnet_transforms = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)), 
    transforms.ToTensor(),                       
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 
])

In [11]:
full_dataset = minclass4torch(all_image_paths, all_labels, transform=alexnet_transforms)

In [12]:
indices = list(range(len(full_dataset)))
train_indices, test_indices = train_test_split(indices, test_size=0.2, random_state=42)

In [13]:
train_dataset = Subset(full_dataset, train_indices)
test_dataset = Subset(full_dataset, test_indices)

In [14]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

In [15]:
import torch.nn as nn
import torchvision.models as models

In [16]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [17]:
alexnet_model = models.alexnet(weights=models.AlexNet_Weights.IMAGENET1K_V1)

In [18]:
num_features = alexnet_model.classifier[6].in_features

In [19]:
alexnet_model.classifier[6] = nn.Linear(num_features, 2)

In [20]:
alexnet_model.to(device)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [21]:
import torch.optim as optim
import time
from sklearn.metrics import f1_score, accuracy_score

In [22]:
LEARNING_RATE = 1e-4 
NUM_EPOCHS = 5
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(alexnet_model.parameters(), lr=LEARNING_RATE)

In [23]:
def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for batch_idx, (data, targets) in enumerate(tqdm(loader, desc="train")):
        data = data.to(device=device)
        targets = targets.to(device=device)

        scores = model(data)
        loss = criterion(scores, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    return running_loss / len(loader)

In [24]:
def check_f1_score(loader, model, device):
    model.eval()
    all_preds = []
    all_targets = []
    with torch.no_grad():
        for x, y in tqdm(loader, desc="Оценка"):
            x = x.to(device=device)
            y = y.to(device=device)
            
            scores = model(x)
            
            _, predictions = scores.max(dim=1) 
            
            all_preds.extend(predictions.cpu().numpy())
            all_targets.extend(y.cpu().numpy())
            
    f1 = f1_score(all_targets, all_preds, average='binary')
    accuracy = accuracy_score(all_targets, all_preds)
    
    model.train()
    return f1, accuracy

In [25]:
f1_before, acc_before = check_f1_score(test_loader, alexnet_model, device)
print(f"F1: {f1_before:.4f}, accuracy: {acc_before:.4f}")
print("\n")

start_time = time.time()

for epoch in range(1, NUM_EPOCHS + 1):
    print(f"\n--- epoch {epoch}/{NUM_EPOCHS} ---")
    
    train_loss = train_epoch(
        alexnet_model, 
        train_loader, 
        criterion, 
        optimizer, 
        device
    )
    
    f1_score_val, acc_val = check_f1_score(test_loader, alexnet_model, device)
    
    print(f"avg loss per epoch: {train_loss:.4f}")
    print(f"accuracy on test: {acc_val:.4f}")
    print(f"F1 on test: {f1_score_val:.4f}")

end_time = time.time()
total_time = end_time - start_time

print(f"total train time {total_time:.2f}")

Оценка: 100%|██████████████████████████████████████████████████████████████████████████| 78/78 [00:17<00:00,  4.44it/s]


F1: 0.6676, accuracy: 0.6173



--- epoch 1/5 ---


train: 100%|█████████████████████████████████████████████████████████████████████████| 312/312 [01:27<00:00,  3.57it/s]
Оценка: 100%|██████████████████████████████████████████████████████████████████████████| 78/78 [00:16<00:00,  4.63it/s]


avg loss per epoch: 0.1190
accuracy on test: 0.9495
F1 on test: 0.9465

--- epoch 2/5 ---


train: 100%|█████████████████████████████████████████████████████████████████████████| 312/312 [01:26<00:00,  3.61it/s]
Оценка: 100%|██████████████████████████████████████████████████████████████████████████| 78/78 [00:16<00:00,  4.63it/s]


avg loss per epoch: 0.0583
accuracy on test: 0.9643
F1 on test: 0.9630

--- epoch 3/5 ---


train: 100%|█████████████████████████████████████████████████████████████████████████| 312/312 [01:25<00:00,  3.64it/s]
Оценка: 100%|██████████████████████████████████████████████████████████████████████████| 78/78 [00:16<00:00,  4.63it/s]


avg loss per epoch: 0.0306
accuracy on test: 0.9581
F1 on test: 0.9588

--- epoch 4/5 ---


train: 100%|█████████████████████████████████████████████████████████████████████████| 312/312 [01:26<00:00,  3.62it/s]
Оценка: 100%|██████████████████████████████████████████████████████████████████████████| 78/78 [00:17<00:00,  4.52it/s]


avg loss per epoch: 0.0244
accuracy on test: 0.9613
F1 on test: 0.9615

--- epoch 5/5 ---


train: 100%|█████████████████████████████████████████████████████████████████████████| 312/312 [01:26<00:00,  3.60it/s]
Оценка: 100%|██████████████████████████████████████████████████████████████████████████| 78/78 [00:16<00:00,  4.70it/s]

avg loss per epoch: 0.0229
accuracy on test: 0.9669
F1 on test: 0.9657
total train time 517.02





In [30]:
# Только начинается 2 эпоха, как я понимаю что это я некомпетентный и бездарный
# Мои результаты с этими и рядом не стояли
# Возможно получится догнать до 98-99 F1, и это уже не accuracy

# Но, в прошлых попытках не было должной обработки, тут есть
# И исходя из того, сколько варнингов за 50 минут я поймал, данные в cat vs dog не идеальные
# тут не заморожены CNN слои, это full fine-tuning (чуть переучить свертку ради снижения overfitting'а на fc)