In [1]:
import os
import csv
import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
import torch.nn.functional as F
from IPython import display
import pandas as pd
import torchvision.transforms as transforms
from torchvision.io import read_image
from torch.utils.data import Dataset
from torch.utils.data import DataLoader


class CatDog(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = read_image(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

def create_csv(dataset_dir, output_csv):
    # Open CSV file for writing
    with open(output_csv, 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)
        csv_writer.writerow(['filename', 'label'])
        
        for filename in os.listdir(dataset_dir):
            if filename.endswith('.jpg'): 
                label = 1 if filename.startswith('dog') else 0
                csv_writer.writerow([filename, label])

dataset_directory = 'train-cats-dogs'
output_csv_file = 'train_labels.csv'
create_csv(dataset_directory, output_csv_file)


In [5]:
resize = transforms.Compose([
    transforms.Resize((224, 224))# Resize the images to a consistent size
])

dataset = CatDog (annotations_file="train_labels.csv", img_dir="train-cats-dogs", transform=resize)
train_set, test_set =torch.utils.data.random_split(dataset,[20000,5000])
train_loader = DataLoader (dataset = train_set, batch_size= 64, shuffle = True)
test_loader = DataLoader (dataset = test_set, batch_size= 64, shuffle = True)

train_features, train_labels = next(iter(train_loader))
print(f"the batches have size: {train_features.size()}")
print(f"labels have size: {train_labels.size()}")

print (train_features[1])
print (train_labels)
train_features, train_labels = next(iter(train_loader))
print (train_features[1])
print (train_labels)


the batches have size: torch.Size([64, 3, 224, 224])
labels have size: torch.Size([64])
tensor([[[169, 170, 171,  ..., 189, 189, 189],
         [169, 170, 172,  ..., 189, 189, 189],
         [170, 171, 172,  ..., 190, 190, 190],
         ...,
         [ 63,  58,  64,  ...,  50,  53,  58],
         [ 48,  49,  57,  ...,  50,  51,  55],
         [ 42,  47,  55,  ...,  50,  49,  52]],

        [[145, 146, 147,  ..., 169, 169, 169],
         [145, 146, 148,  ..., 169, 169, 169],
         [146, 147, 148,  ..., 170, 170, 170],
         ...,
         [ 35,  31,  35,  ...,  55,  58,  63],
         [ 22,  23,  30,  ...,  55,  56,  60],
         [ 18,  22,  28,  ...,  55,  54,  57]],

        [[109, 110, 111,  ..., 132, 132, 132],
         [109, 110, 112,  ..., 132, 132, 132],
         [110, 111, 112,  ..., 133, 133, 133],
         ...,
         [ 15,  10,  15,  ...,  48,  51,  56],
         [  4,   4,  12,  ...,  48,  49,  53],
         [  1,   5,  11,  ...,  48,  47,  50]]], dtype=torch.uint8)

In [3]:
class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.size(0), -1)

model1 = nn.Sequential(
    nn.Conv2d(3, 96, kernel_size=11, stride = 4, padding=0),
    nn.ReLU(),
    nn.MaxPool2d(3, stride= 2),
    # [100,96,26,26]
    nn.Conv2d(96, 256, kernel_size=5, padding=2),
    nn.ReLU(),
    nn.MaxPool2d(3, stride= 2),
    # [100,256,12,12]
    nn.Conv2d(256, 256, kernel_size=3, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(3, stride= 2),
    #[100,256,5,5])
    Flatten(),
    nn.Linear(6400, 32),
    nn.ReLU(),
    nn.Linear(32, 2)
)

In [7]:
num_epochs = 50
for t in range(num_epochs):     # iterate through the training examples
    train_features, train_labels = next(iter(train_loader))
    train_features = train_features.float()/255
    output = model1(train_features)
    loss=0
    for i in range (64):
        desired = F.one_hot(train_labels[t], 2).float()
        loss += F.binary_cross_entropy_with_logits(output[i], desired)    # combines softmax and cross entropy loss
    loss.backward()                          # the backward pass computes gradients