In [81]:
#
# Script to create a CSV file for the cats vs dogs dataset
#
import csv
import os

# Directory paths
cats_dir = "../data/train/cnn_train_images/cats"
dogs_dir = "../data/train/cnn_train_images/dogs"
csv_dir = '../data/train/cnn_train_images'

# File path within the folder
csv_file_path = os.path.join(csv_dir, "cats_vs_dogs_train.csv")

# List to store image paths and labels
data = []

# Process cats images
for filename in os.listdir(cats_dir):
    filepath = os.path.join(cats_dir, filename)
    
    # Splitting the string at the first occurrence of '.'
    id_number = filename.split('.', 1)

    # Keeping only the part before the first '.'
    id = id_number[0]

    data.append([id, 1])  # Label 1 for cat

# Process dogs images
for filename in os.listdir(dogs_dir):
    filepath = os.path.join(dogs_dir, filename)
            
    # Splitting the string at the first occurrence of '.'
    id_number = filename.split('.', 1)

    # Keeping only the part before the first '.'
    id = id_number[0]

    data.append([id, 0])  # Label 0 for dog

# Shuffle the data
import random
random.shuffle(data)

# Write data to CSV file
with open(csv_file_path, "w", newline="") as csvfile:
    csvwriter = csv.writer(csvfile)
    csvwriter.writerow(["Id", "Cat"])
    csvwriter.writerows(data)


In [82]:
import torch
import os
from torchvision import transforms
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import DataLoader, Dataset
from PIL import Image

# Define relevant variables for the ML task (Hyperparameters)
batch_size = 32
num_classes = 2 # 2 classes: Cat(1) and Dog(0)
learning_rate = 0.00001
num_epochs = 1000

# Step 1: Prepare the Dataset
class CustomDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir,
                                self.data.iloc[idx, 0]) + ".jpg"
        image = Image.open(img_name)
        label = self.data.iloc[idx, 1]

        if self.transform:
            image = self.transform(image)

        return image, label
    
# Define transformations
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Load dataset
dataset = CustomDataset(csv_file='../data/train/cnn_train_images/cats_vs_dogs_train.csv',
                              root_dir='../data/train/cnn_train_images/',
                              transform=transform)

# Split dataset into training, validation, and test sets
train_size = int(0.8 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])

# Define dataloaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [83]:
# Step 2: Define the CNN Architecture
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3)
        self.fc1 = nn.Linear(128 * 6 * 6, 512)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.max_pool2d(x, 2)
        x = torch.relu(self.conv2(x))
        x = torch.max_pool2d(x, 2)
        x = torch.relu(self.conv3(x))
        x = torch.max_pool2d(x, 2)
        x = x.view(-1, 128 * 6 * 6)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [84]:
# Step 3: Train the Model

# WINDOWS: Device will determine whether to run the training on GPU or CPU.
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# MAC: Device will determine whether to run the training on GPU or CPU.
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')

# Initialize the model, loss, and optimizer
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9) 
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:    # Print every 100 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0
    print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

print('Finished Training')


Epoch [1/100], Loss: 0.6954
Epoch [2/100], Loss: 0.6854
Epoch [3/100], Loss: 0.6820
Epoch [4/100], Loss: 0.6770
Epoch [5/100], Loss: 0.6678
Epoch [6/100], Loss: 0.6585
Epoch [7/100], Loss: 0.6358
Epoch [8/100], Loss: 0.6361
Epoch [9/100], Loss: 0.6098
Epoch [10/100], Loss: 0.6422
Epoch [11/100], Loss: 0.6281
Epoch [12/100], Loss: 0.5729
Epoch [13/100], Loss: 0.6094
Epoch [14/100], Loss: 0.5990
Epoch [15/100], Loss: 0.5525
Epoch [16/100], Loss: 0.4987
Epoch [17/100], Loss: 0.5382
Epoch [18/100], Loss: 0.4783
Epoch [19/100], Loss: 0.4597
Epoch [20/100], Loss: 0.4666
Epoch [21/100], Loss: 0.4781
Epoch [22/100], Loss: 0.4601
Epoch [23/100], Loss: 0.3525
Epoch [24/100], Loss: 0.3564
Epoch [25/100], Loss: 0.3885
Epoch [26/100], Loss: 0.3181
Epoch [27/100], Loss: 0.3138
Epoch [28/100], Loss: 0.3261
Epoch [29/100], Loss: 0.2997
Epoch [30/100], Loss: 0.2174
Epoch [31/100], Loss: 0.2476
Epoch [32/100], Loss: 0.2229
Epoch [33/100], Loss: 0.2126
Epoch [34/100], Loss: 0.1867
Epoch [35/100], Loss: 0

In [85]:
# Step 4: Evaluate the Model

# Validation
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

val_accuracy = correct / total
print(f"Validation Accuracy: {val_accuracy:.2%}")

# Test
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = correct / total
print(f"Test Accuracy: {test_accuracy:.2%}")

Validation Accuracy: 45.00%
Test Accuracy: 60.00%


In [86]:
torch.save(model.state_dict(), '../data/train/cnn_train_images/model/cats_vs_dogs.pth')

In [94]:
import torch
import torchvision.transforms as transforms
from PIL import Image

# Step 1: Preprocess the Image
def preprocess_image(image_path):
    transform = transforms.Compose([
        transforms.Resize((64, 64)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ])
    image = Image.open(image_path)
    image = transform(image)
    return image.unsqueeze(0)  # Add batch dimension

# Step 2: Load the Model
model = CNN()  # Assuming you've defined your CNN model

# Load the trained model weights
model.load_state_dict(torch.load('../data/train/cnn_train_images/model/cats_vs_dogs.pth'))  # Replace 'model_weights.pth' with your model file path

# Step 3: Perform Inference
def predict_image(image_path, model):
    image = preprocess_image(image_path)
    model.eval()
    with torch.no_grad():
        outputs = model(image)
        _, predicted = torch.max(outputs, 1)
        return predicted.item()

# Step 4: Post-process the Predictions
def get_class_label(class_index):
    if class_index == 0:
        return 'Dog'
    elif class_index == 1:
        return 'Cat'
    else:
        return 'Unknown'

# Example usage
image_path = '../data/train/cnn_train_images/0a12576d99ec278f415c51f47279e89a.jpg'  # Replace with the path to your image
predicted_class = predict_image(image_path, model)
predicted_label = get_class_label(predicted_class)
print('Predicted class:', predicted_label)


Predicted class: Dog
