In [21]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

import os
from PIL import Image

In [22]:
device = torch.device("cuda" if torch.cuda.is_available else "cpu")
device

device(type='cuda')

In [23]:
class DogsVsCats(Dataset):
    def __init__(self, path_dir1, path_dir2, transform=None):
        self.transform = transform
        self.dog_paths = [(os.path.join(path_dir1, f)) for f in sorted(os.listdir(path_dir1))]
        self.cat_paths = [(os.path.join(path_dir2, f)) for f in sorted(os.listdir(path_dir2))]

    def __len__(self):
        return len(self.dog_paths) + len(self.cat_paths)
    
    def __getitem__(self, i):
        if i < len(self.dog_paths):
            label = 0
            img_path = self.dog_paths[i]
        else:
            label = 1
            img_path = self.cat_paths[i - len(self.dog_paths)]

        img = Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)
        
        label = torch.tensor(label)

        return img, label

In [24]:
transform = transforms.Compose([
    transforms.Resize((300, 256)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

In [25]:
dogs_path_train = "./data/train/Dog/"
cats_path_train = "./data/train/Cat/"

dogs_path_test = "./data/test/Dog/"
cats_path_test = "./data/test/Cat/"

dogs_vs_cats_train_dataset = DogsVsCats(dogs_path_train, cats_path_train, transform)
dogs_vs_cats_test_dataset = DogsVsCats(dogs_path_test, cats_path_test, transform)

In [26]:
train_loader = DataLoader(dataset=dogs_vs_cats_train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=dogs_vs_cats_test_dataset, batch_size=64, shuffle=False)

In [27]:
class NeuralNet(nn.Module):
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.maxpool = nn.MaxPool2d(2,2)
        self.conv1 = nn.Conv2d(3, 128, 3, stride=1, padding=0)
        self.conv2 = nn.Conv2d(128, 128, 3, stride=1, padding=0)
        self.conv3 = nn.Conv2d(128, 256, 3, stride=1, padding=0)

        self.layer1 = nn.Linear(256 * 35 * 30, 20)
        self.layer2 = nn.Linear(20, 2)

    def forward(self, x):
        x = self.maxpool(F.relu(self.conv1(x)))
        x = self.maxpool(F.relu(self.conv2(x)))
        x = self.maxpool(F.relu(self.conv3(x)))

        x = x.view(-1, 256 * 35 * 30)
        x = F.relu(self.layer1(x))
        x = self.layer2(x)
        return x

In [28]:
model = NeuralNet()
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [29]:
epochs = 15

for epoch in range(epochs):
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 1 == 0: 
        print(f"Epoch: [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}")

Epoch: [1/15], Loss: 0.6318
Epoch: [2/15], Loss: 0.5751
Epoch: [3/15], Loss: 0.5958
Epoch: [4/15], Loss: 0.7371
Epoch: [5/15], Loss: 0.4854
Epoch: [6/15], Loss: 0.5201
Epoch: [7/15], Loss: 0.2762
Epoch: [8/15], Loss: 0.3338
Epoch: [9/15], Loss: 0.2968
Epoch: [10/15], Loss: 0.3198
Epoch: [11/15], Loss: 0.5266
Epoch: [12/15], Loss: 0.2657
Epoch: [13/15], Loss: 0.4000
Epoch: [14/15], Loss: 0.2828
Epoch: [15/15], Loss: 0.5356


In [30]:
correct, total = 0, 0

model.eval()
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        prediction = model(inputs)
        _, predicted = torch.max(prediction, dim=1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print(f"Test Accuracy: {accuracy*100:.2f}%")

Test Accuracy: 82.96%


In [34]:
classes = {
    0:"Dog",
    1:"Cat",
}

image = Image.open("./puppy.jpg")
transform = transforms.Compose([transforms.ToTensor(), 
                                transforms.Resize((300, 256))])
image_tensor = transform(image)

predicted_class = model(image_tensor.unsqueeze(0).to(device))

with torch.no_grad():
    predicted = torch.argmax(predicted_class)
classes.get(int(predicted))

'Dog'

In [37]:
torch.save(model.state_dict, 'model.pt')