In [4]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import torch.optim as optim
import random
import numpy as np

In [5]:
def set_seed(seed=42):

    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed) 
    np.random.seed(seed)
    random.seed(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

In [6]:
import os, shutil
from PIL import Image

# Copy dataset to working dir (only first time)
src = "/kaggle/input/microsoft-catsvsdogs-dataset/PetImages"
dst = "/kaggle/working/PetImages"

if not os.path.exists(dst):
    shutil.copytree(src, dst)

# Now clean in /kaggle/working
for category in ["Cat", "Dog"]:
    folder = os.path.join(dst, category)
    for file in os.listdir(folder):
        fpath = os.path.join(folder, file)
        try:
            img = Image.open(fpath)
            img.verify()
        except:
            os.remove(fpath)



In [7]:
transform = transforms.Compose([transforms.Resize((128,128)), 
                               transforms.ToTensor(), 
                               transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

In [8]:
dataset = datasets.ImageFolder(root=dst, transform=transform)

In [9]:
print(len(dataset))

24998


In [10]:
print(dataset.classes)

['Cat', 'Dog']


In [11]:
train_size = int(0.7*len(dataset))
val_size = int(0.1*len(dataset))
test_size = len(dataset) - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])


In [12]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("device using: ", device)

device using:  cuda


In [13]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=64,  num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=64,  num_workers=2, pin_memory=True)

In [14]:
class PetNet(nn.Module):
    def __init__(self, num_channels):
        super().__init__()
        self.features_extractor = nn.Sequential(
            nn.Conv2d(num_channels, 32, kernel_size=3, padding='same'),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(32, 64, kernel_size=3, padding='same'),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, padding='same'),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128*16*16, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128,64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Linear(64,2)
        )

    def forward(self, x):
        x = self.features_extractor(x)
        x = self.classifier(x)
        return x

In [15]:
Pet = PetNet(3).to(device)
optimizer = optim.Adam(Pet.parameters(), lr=1e-3)
Loss = nn.CrossEntropyLoss()
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.5, min_lr=1e-6)

In [16]:
epochs = 30
es_patience = 7
counter = 0
best_val_loss = float('inf')     #float('inf') is basically Python’s way of saying “this number is larger than anything I can represent” → i.e. positive infinity
delta=1e-4

for epoch in range(epochs):
    Pet.train()
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = Pet(images)
        loss = Loss(outputs,labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


    #Validation part
    total_val_loss=0
    Pet.eval()
    total_samples = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = Pet(images)
            loss = Loss(outputs, labels) # mean loss per sample
            total_val_loss += loss*labels.size(0) #total loss per batch in each loop, eventually at the end, total loss on whole data
            total_samples += labels.size(0)
        avg_val_loss = total_val_loss / total_samples
        scheduler.step(avg_val_loss)

    #Early stopping
    if avg_val_loss < best_val_loss-delta:
        best_val_loss = avg_val_loss
        counter=0
    else:
        counter+=1
        if counter>es_patience:
            print("Early stopping triggered.")
            break
    print(f"Epoch:{(epoch+1)}/{epochs} | Val_loss: {avg_val_loss:.4f} | Counter: {counter}")

Epoch:1/30 | Val_loss: 0.4502 | Counter: 0
Epoch:2/30 | Val_loss: 0.4244 | Counter: 0
Epoch:3/30 | Val_loss: 0.3788 | Counter: 0
Epoch:4/30 | Val_loss: 0.3609 | Counter: 0
Epoch:5/30 | Val_loss: 0.3785 | Counter: 1
Epoch:6/30 | Val_loss: 0.4326 | Counter: 2
Epoch:7/30 | Val_loss: 0.4595 | Counter: 3
Epoch:8/30 | Val_loss: 0.5153 | Counter: 4
Epoch:9/30 | Val_loss: 0.4552 | Counter: 5
Epoch:10/30 | Val_loss: 0.4823 | Counter: 6
Epoch:11/30 | Val_loss: 0.5274 | Counter: 7
Early stopping triggered.


In [18]:
#Testing
Pet.eval()
correct=0
total=0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = Pet(images)
        _, predicted = torch.max(outputs, dim=1)
        total += labels.size(0)
        correct+=(predicted==labels).sum().item()
    test_accuracy=100*correct/total
    print(f"Test Accuracy: {test_accuracy:.2f}%")
    

Test Accuracy: 84.62%
