In [3]:
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
from torch.utils.tensorboard import SummaryWriter


def verify_and_clean_dataset(root_dir='PetImages'):
    print("Проверка датасета на поврежденные изображения...")
    removed_count = 0

    for class_name in os.listdir(root_dir):
        class_path = os.path.join(root_dir, class_name)
        if not os.path.isdir(class_path):
            continue

        for img_name in os.listdir(class_path):
            img_path = os.path.join(class_path, img_name)
            try:
                img = Image.open(img_path)
                img.verify()
                img = Image.open(img_path)
                img.load()

                if img.size[0] < 50 or img.size[1] < 50:
                    os.remove(img_path)
                    removed_count += 1
                    print(f"Удалено (слишком маленькое): {img_path}")

            except Exception as e:
                os.remove(img_path)
                removed_count += 1
                print(f"Удалено (поврежденное): {img_path}")

    print(f"Проверка завершена. Удалено файлов: {removed_count}")

verify_and_clean_dataset('PetImages')

Проверка датасета на поврежденные изображения...




Проверка завершена. Удалено файлов: 0


In [9]:
def check_data(loader):
    images, labels = next(iter(loader))
    print(f"Batch shape: {images.shape}")
    print(f"Labels: {labels[:10]}")
    print(f"Label distribution: {torch.bincount(labels)}")

check_data(train_loader)

Batch shape: torch.Size([128, 3, 224, 224])
Labels: tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 0])
Label distribution: tensor([56, 72])


In [8]:
transform_train = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225])
])

transform_test = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225])
])


dataset = datasets.ImageFolder(root='PetImages')
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_dataset.dataset.transform = transform_train
test_dataset.dataset.transform = transform_test

print(f"Всего изображений: {len(dataset)}")
print(f"Train: {train_size}, Test: {test_size}\n")

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=0, pin_memory=False, persistent_workers=False)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=0, pin_memory=False, persistent_workers=False)


class AlexNet(nn.Module):
    def __init__(self, num_classes=2):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(96, 256, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(256, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


def train_and_evaluate(train_loader, test_loader, epochs=15):
    device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
    print(f"Устройство: {device}")

    model = AlexNet(num_classes=2).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[7, 12], gamma=0.1)

    writer = SummaryWriter(log_dir="runs/CatsVsDogs_FullAlexNet")

    best_acc = 0
    start_time = time.time()

    for epoch in range(epochs):
        epoch_start = time.time()

        # Training
        model.train()
        train_loss = 0
        train_correct = 0
        train_total = 0

        print(f"Epoch {epoch+1}/{epochs} - Training...", end='')

        for batch_idx, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            preds = outputs.argmax(dim=1)
            train_correct += (preds == labels).sum().item()
            train_total += labels.size(0)

            if (batch_idx + 1) % 20 == 0:
                print(f"\rEpoch {epoch+1}/{epochs} - Batch {batch_idx+1}/{len(train_loader)}", end='')

        avg_train_loss = train_loss / len(train_loader)
        train_acc = 100. * train_correct / train_total

        #Validation
        print(f"\rEpoch {epoch+1}/{epochs} - Validating...", end='')

        model.eval()
        test_loss = 0
        test_correct = 0
        test_total = 0

        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                test_loss += loss.item()
                preds = outputs.argmax(dim=1)
                test_correct += (preds == labels).sum().item()
                test_total += labels.size(0)

        avg_test_loss = test_loss / len(test_loader)
        test_acc = 100. * test_correct / test_total

        scheduler.step()

        # Logging
        writer.add_scalar("Loss/Train", avg_train_loss, epoch)
        writer.add_scalar("Loss/Test", avg_test_loss, epoch)
        writer.add_scalar("Accuracy/Train", train_acc, epoch)
        writer.add_scalar("Accuracy/Test", test_acc, epoch)
        writer.add_scalar("LR", optimizer.param_groups[0]['lr'], epoch)

        epoch_time = time.time() - epoch_start
        total_time = time.time() - start_time
        est_remaining = epoch_time * (epochs - epoch - 1)

        print(f"\rEpoch {epoch+1}/{epochs} | "
              f"Train: L={avg_train_loss:.3f} A={train_acc:.1f}% | "
              f"Test: L={avg_test_loss:.3f} A={test_acc:.1f}% | "
              f"Time: {epoch_time:.0f}s | ETA: {est_remaining/60:.1f}min")

        if test_acc > best_acc:
            best_acc = test_acc
            torch.save(model.state_dict(), "alexnet_best.pth")
            print(f"Лучшая модель сохранена! Acc: {test_acc:.2f}%")

    writer.close()

    total_time = time.time() - start_time
    print(f"Общее время: {total_time/60:.1f} минут")
    print(f"Лучшая test accuracy: {best_acc:.2f}%")

    return model


print("Начало обучения")
model = train_and_evaluate(train_loader, test_loader, epochs=15)

Всего изображений: 24983
Train: 19986, Test: 4997

Начало обучения
Устройство: mps
Epoch 1/15 - Batch 140/157



Epoch 1/15 | Train: L=0.687 A=54.9% | Test: L=0.663 A=62.2% | Time: 484s | ETA: 113.0min
Лучшая модель сохранена! Acc: 62.24%
Epoch 2/15 | Train: L=0.625 A=65.4% | Test: L=0.597 A=68.4% | Time: 486s | ETA: 105.2min
Лучшая модель сохранена! Acc: 68.38%
Epoch 3/15 | Train: L=0.585 A=69.4% | Test: L=0.550 A=72.0% | Time: 370s | ETA: 73.9min
Лучшая модель сохранена! Acc: 72.02%
Epoch 4/15 | Train: L=0.550 A=72.5% | Test: L=0.616 A=67.3% | Time: 1039s | ETA: 190.4min
Epoch 5/15 | Train: L=0.495 A=76.3% | Test: L=0.481 A=76.9% | Time: 501s | ETA: 83.6min
Лучшая модель сохранена! Acc: 76.93%
Epoch 6/15 | Train: L=0.455 A=78.6% | Test: L=0.599 A=70.3% | Time: 333s | ETA: 50.0min
Epoch 7/15 | Train: L=0.405 A=81.2% | Test: L=0.353 A=84.0% | Time: 326s | ETA: 43.4min
Лучшая модель сохранена! Acc: 83.95%
Epoch 8/15 | Train: L=0.306 A=86.8% | Test: L=0.348 A=84.5% | Time: 373s | ETA: 43.5min
Лучшая модель сохранена! Acc: 84.49%
Epoch 9/15 | Train: L=0.280 A=88.3% | Test: L=0.296 A=87.0% | Time: 25

Моя модель
![](TensorBoard/1.png)
![](TensorBoard/2.png)
![](TensorBoard/3.png)
![](TensorBoard/4.png)
![](TensorBoard/5.png)


In [20]:
def predict_image(image_path, model_path='alexnet_best.pth'):
    device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

    model = AlexNet(num_classes=2).to(device)
    model.load_state_dict(torch.load(model_path, weights_only=True))
    model.eval()

    transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    img = Image.open(image_path).convert('RGB')
    img_tensor = transform(img).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(img_tensor)
        prob = torch.softmax(output, dim=1)
        pred = output.argmax(dim=1).item()

    # Названия классов (проверь порядок!)
    classes = ['Cat', 'Dog']
    confidence = prob[0][pred].item() * 100

    print(f"Prediction: {classes[pred]} Confidence: {confidence:.1f}%")

    return pred, prob


def predict_folder(folder_path):
    results = {}
    for filename in os.listdir(folder_path):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            path = os.path.join(folder_path, filename)
            print(f"{filename} → {predict_image(path)}")

if __name__ == "__main__":
    folder = "test"
    predict_folder(folder)


Prediction: Cat Confidence: 75.8%
8.jpg → (0, tensor([[0.7578, 0.2422]], device='mps:0'))
Prediction: Dog Confidence: 100.0%
11.jpeg → (1, tensor([[2.8414e-04, 9.9972e-01]], device='mps:0'))
Prediction: Cat Confidence: 76.4%
9.jpg → (0, tensor([[0.7637, 0.2363]], device='mps:0'))
Prediction: Cat Confidence: 98.6%
10.jpg → (0, tensor([[0.9860, 0.0140]], device='mps:0'))
Prediction: Cat Confidence: 99.0%
4.jpg → (0, tensor([[0.9896, 0.0104]], device='mps:0'))
Prediction: Cat Confidence: 98.8%
5.jpg → (0, tensor([[0.9878, 0.0122]], device='mps:0'))
Prediction: Dog Confidence: 77.9%
7.jpg → (1, tensor([[0.2205, 0.7795]], device='mps:0'))
Prediction: Dog Confidence: 92.1%
6.jpg → (1, tensor([[0.0786, 0.9214]], device='mps:0'))
Prediction: Cat Confidence: 97.6%
2.jpg → (0, tensor([[0.9761, 0.0239]], device='mps:0'))
Prediction: Dog Confidence: 90.6%
3.jpg → (1, tensor([[0.0941, 0.9059]], device='mps:0'))
Prediction: Cat Confidence: 99.8%
1.jpg → (0, tensor([[0.9984, 0.0016]], device='mps:0')

In [21]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split
import time

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.485, 0.456, 0.406),
                         std=(0.229, 0.224, 0.225))
])

dataset = datasets.ImageFolder(root='PetImages', transform=transform)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

model = models.alexnet(weights=models.AlexNet_Weights.IMAGENET1K_V1)

for param in model.features.parameters():
    param.requires_grad = False

model.classifier[6] = nn.Linear(4096, 2)

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
model = model.to(device)

optimizer = optim.Adam(model.classifier.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

def train_and_evaluate(model, train_loader, test_loader, epochs=5, title="Cats vs Dogs"):
    train_losses, test_losses, test_accs, times = [], [], [], []

    for epoch in range(epochs):
        start_time = time.time()

        # --- Training ---
        model.train()
        running_loss = 0
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        train_losses.append(running_loss / len(train_loader))

        # --- Evaluation ---
        model.eval()
        test_loss, correct = 0, 0
        with torch.no_grad():
            for data, target in test_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                test_loss += criterion(output, target).item()
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

        test_losses.append(test_loss / len(test_loader))
        acc = 100. * correct / len(test_loader.dataset)
        test_accs.append(acc)
        epoch_time = time.time() - start_time
        times.append(epoch_time)

        print(f"Epoch {epoch+1}/{epochs}: "
              f"Train loss={train_losses[-1]:.4f}, "
              f"Test loss={test_losses[-1]:.4f}, "
              f"Acc={acc:.2f}%, "
              f"Time={epoch_time:.2f}s")

print("Transfer Learning: AlexNet на Cats vs Dogs")
train_and_evaluate(model, train_loader, test_loader, epochs=3)

Transfer Learning: AlexNet на Cats vs Dogs
Epoch 1/3: Train loss=0.1481, Test loss=0.1106, Acc=95.66%, Time=368.12s
Epoch 2/3: Train loss=0.1054, Test loss=0.1049, Acc=95.80%, Time=247.93s
Epoch 3/3: Train loss=0.0928, Test loss=0.1073, Acc=95.60%, Time=237.49s


In [None]:
torch.save(model.state_dict(), "alexnet_cats_vs_dogs.pth")

In [15]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from PIL import Image
import os


device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

model = models.alexnet(weights=models.AlexNet_Weights.IMAGENET1K_V1)
model.classifier[6] = nn.Linear(4096, 2)
model.load_state_dict(torch.load("alexnet_cats_vs_dogs.pth", map_location=device, weights_only=True))
model = model.to(device)
model.eval()

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.485, 0.456, 0.406),
                         std=(0.229, 0.224, 0.225))
])

def predict_image(image_path):
    image = Image.open(image_path).convert("RGB")
    tensor = transform(image).unsqueeze(0).to(device)
    with torch.no_grad():
        outputs = model(tensor)
        _, predicted = torch.max(outputs, 1)
    label = "Cat 🐱" if predicted.item() == 0 else "Dog 🐶"
    return label

def predict_folder(folder_path):
    results = {}
    for filename in os.listdir(folder_path):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            path = os.path.join(folder_path, filename)
            label = predict_image(path)
            results[filename] = label
            print(f"{filename} → {label}")
    return results

if __name__ == "__main__":
    folder = "test"
    predict_folder(folder)


8.jpg → Cat 🐱
11.jpeg → Dog 🐶
9.jpg → Cat 🐱
10.jpg → Cat 🐱
4.jpg → Cat 🐱
5.jpg → Cat 🐱
7.jpg → Cat 🐱
6.jpg → Cat 🐱
2.jpg → Cat 🐱
3.jpg → Cat 🐱
1.jpg → Cat 🐱
