In [1]:
import torch
from torchvision.transforms import v2
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim

In [2]:
train_transforms = v2.Compose([
    v2.Resize((64,64)),
    v2.RandomHorizontalFlip(),
    v2.RandomAffine(degrees=0,shear=0.2,scale=(0.8, 1.2)),
    v2.ToTensor(),
    v2.Normalize((0.5,), (0.5,))])

train_set = datasets.ImageFolder(
    root='dataset/training_set',
    transform=train_transforms)

train_loader = DataLoader(train_set, batch_size=32, shuffle=True, num_workers=5)



In [3]:
test_transforms = v2.Compose([
    v2.Resize((64,64)),
    v2.ToTensor(),
    v2.Normalize((0.5,), (0.5,))])

test_set = datasets.ImageFolder(
    root='dataset/test_set',
    transform=test_transforms)

test_loader = DataLoader(test_set, batch_size=32, shuffle=False, num_workers=5)

In [4]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(in_features=32 * 16 * 16, out_features=128)
        self.fc2 = nn.Linear(in_features=128, out_features=1)
        self.sigmoid = nn.Sigmoid()
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.conv1(x))  
        x = self.pool(x)
        x = self.relu(self.conv2(x))  
        x = self.pool(x)
        x = x.view(-1, 32 * 16 * 16)  
        x = self.relu(self.fc1(x))    
        x = self.sigmoid(self.fc2(x)) 
        return x

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNN()
model.to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [6]:
num_epochs = 25
print(device)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(
            device).float().view(-1, 1)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)
        
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")
    
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device).float().view(-1, 1)
            outputs = model(images)
            predicted = (outputs > 0.5).float()
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Validation Accuracy: {accuracy:.2f}%")

print("Training complete!")

cuda
Epoch [1/25], Loss: 0.6565
Validation Accuracy: 67.25%
Epoch [2/25], Loss: 0.6014
Validation Accuracy: 69.95%
Epoch [3/25], Loss: 0.5682
Validation Accuracy: 71.40%
Epoch [4/25], Loss: 0.5329
Validation Accuracy: 73.15%
Epoch [5/25], Loss: 0.5053
Validation Accuracy: 73.35%
Epoch [6/25], Loss: 0.4943
Validation Accuracy: 75.70%
Epoch [7/25], Loss: 0.4794
Validation Accuracy: 77.15%
Epoch [8/25], Loss: 0.4695
Validation Accuracy: 75.45%
Epoch [9/25], Loss: 0.4660
Validation Accuracy: 77.05%
Epoch [10/25], Loss: 0.4508
Validation Accuracy: 78.75%
Epoch [11/25], Loss: 0.4367
Validation Accuracy: 77.60%
Epoch [12/25], Loss: 0.4278
Validation Accuracy: 75.75%
Epoch [13/25], Loss: 0.4141
Validation Accuracy: 78.75%
Epoch [14/25], Loss: 0.4096
Validation Accuracy: 79.45%
Epoch [15/25], Loss: 0.3968
Validation Accuracy: 77.90%
Epoch [16/25], Loss: 0.3941
Validation Accuracy: 80.15%
Epoch [17/25], Loss: 0.3865
Validation Accuracy: 80.35%
Epoch [18/25], Loss: 0.3720
Validation Accuracy: 78.

In [8]:
from PIL import Image

predict_image = Image.open('dataset/single_prediction/cat_or_dog_2.jpg')
predict_image = test_transforms(predict_image).unsqueeze(0).to(device)
prediction = model(predict_image)
probability = torch.sigmoid(prediction)
predicted_class = (probability > 0.5).float()
print(predicted_class.item(), probability.item())
classes = ['Cat', 'Dog']
class_name = classes[int(predicted_class.item())]
print(f'The model predicts the image is a: {class_name}')

1.0 0.5577730536460876
The model predicts the image is a: Dog
