In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torchvision
from torchvision import transforms, datasets
import torch.nn as nn
import torch.optim as optim

import os

In [5]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("Running on the GPU")
else:
    device = torch.device("cpu")
    print("Running on the CPU")

Running on the CPU


In [6]:
transformation = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
horse_train = []
panda_train = []
horse_test = []
panda_test = []
for filename in os.listdir("images/Horse_train"):
    img = torchvision.io.read_image("images/Horse_train/"+filename)
    img = img.float() / 255.0
    horse_train.append(img)
for filename in os.listdir("images/Panda_train"):
    img = torchvision.io.read_image("images/Panda_train/"+filename)
    img = img.float() / 255.0
    panda_train.append(img)
for filename in os.listdir("images/Horse_test"):
    img = torchvision.io.read_image("images/Horse_test/"+filename)
    img = img.float() / 255.0
    horse_test.append(img)
for filename in os.listdir("images/Panda_test"):
    img = torchvision.io.read_image("images/Panda_test/"+filename)
    img = img.float() / 255.0
    panda_test.append(img)


In [7]:
X_train = torch.cat((torch.stack(horse_train), torch.stack(panda_train)))
y_train = torch.cat((torch.zeros(len(horse_train)), torch.ones(len(panda_train))))  # Class 0 for horse, class 1 for panda
X_test = torch.cat((torch.stack(horse_test), torch.stack(panda_test)))
y_test = torch.cat((torch.zeros(len(horse_test)), torch.ones(len(panda_test)))) # Class 0 for horse, class 1 for panda

train_indices = torch.randperm(len(X_train))
X_train = X_train[train_indices]
y_train = y_train[train_indices]

test_indices = torch.randperm(len(X_test))
X_test = X_test[test_indices]
y_test = y_test[test_indices]

X_train = X_train.to(device)
y_train = y_train.to(device)
X_test = X_test.to(device)
y_test = y_test.to(device)

print("Shape of X_train: ", X_train.shape)
print("Shape of y_train: ", y_train.shape)
print("Shape of X_test: ", X_test.shape)
print("Shape of y_test: ", y_test.shape)

Shape of X_train:  torch.Size([160, 3, 180, 180])
Shape of y_train:  torch.Size([160])
Shape of X_test:  torch.Size([40, 3, 180, 180])
Shape of y_test:  torch.Size([40])


In [8]:
print(y_train)

tensor([0., 1., 1., 1., 1., 1., 0., 0., 1., 1., 1., 0., 0., 1., 0., 1., 1., 1.,
        1., 1., 0., 0., 0., 0., 1., 0., 1., 1., 0., 1., 1., 0., 0., 0., 0., 0.,
        0., 1., 1., 1., 1., 0., 1., 1., 1., 1., 0., 0., 1., 0., 0., 1., 1., 0.,
        0., 0., 1., 1., 0., 0., 1., 1., 0., 0., 0., 1., 1., 0., 1., 1., 0., 0.,
        0., 1., 1., 1., 0., 0., 1., 1., 0., 0., 0., 1., 1., 0., 0., 1., 1., 1.,
        0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0., 1., 1., 1., 1., 1., 0.,
        0., 1., 0., 0., 0., 1., 1., 1., 1., 0., 1., 1., 0., 1., 1., 0., 0., 0.,
        1., 1., 0., 0., 1., 0., 0., 0., 1., 1., 1., 0., 1., 1., 0., 0., 0., 0.,
        0., 1., 0., 0., 0., 0., 1., 0., 0., 1., 0., 1., 1., 0., 1., 1.])


### VGG (1 Block)

In [9]:
class vgg_1_block(nn.Module):
    def __init__(self):
        super(vgg_1_block, self).__init__()
        self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 32, kernel_size = 3, padding='same')
        self.relu = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(32*90*90, 128)
        self.fc2 = nn.Linear(128, 1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.maxpool1(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        return x
    
vgg1b = vgg_1_block().to(device)
print(vgg1b)
print("Number of parameters: ", sum(p.numel() for p in vgg1b.parameters()))
print("Number of trainable parameters: ", sum(p.numel() for p in vgg1b.parameters() if p.requires_grad))


vgg_1_block(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=same)
  (relu): ReLU()
  (maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=259200, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)
Number of parameters:  33178753
Number of trainable parameters:  33178753


In [10]:
criterion = nn.BCELoss()
optimizer = optim.SGD(vgg1b.parameters(), lr=0.001, momentum=0.9)
epochs = 1000
for i in range(epochs):
    optimizer.zero_grad()
    y_pred = vgg1b(X_train)
    loss = criterion(y_pred, y_train.unsqueeze(1))
    loss.backward()
    optimizer.step()
    print("Epoch:", i, "Loss:", loss.item())


Epoch: 0 Loss: 0.6917229890823364
Epoch: 1 Loss: 0.6881644129753113
Epoch: 2 Loss: 0.6831415295600891
Epoch: 3 Loss: 0.677044689655304
Epoch: 4 Loss: 0.6698112487792969
Epoch: 5 Loss: 0.6615290641784668
Epoch: 6 Loss: 0.6521018147468567
Epoch: 7 Loss: 0.6418759822845459
Epoch: 8 Loss: 0.6309963464736938
Epoch: 9 Loss: 0.619530439376831
Epoch: 10 Loss: 0.6077923774719238
Epoch: 11 Loss: 0.5958319306373596
Epoch: 12 Loss: 0.5838675498962402
Epoch: 13 Loss: 0.5722123384475708
Epoch: 14 Loss: 0.5608342885971069
Epoch: 15 Loss: 0.5496755838394165
Epoch: 16 Loss: 0.5388544797897339
Epoch: 17 Loss: 0.5283926129341125
Epoch: 18 Loss: 0.5182187557220459
Epoch: 19 Loss: 0.5084472298622131
Epoch: 20 Loss: 0.4990082383155823
Epoch: 21 Loss: 0.48986977338790894
Epoch: 22 Loss: 0.48101434111595154
Epoch: 23 Loss: 0.4724217355251312
Epoch: 24 Loss: 0.4640778601169586
Epoch: 25 Loss: 0.4559554159641266
Epoch: 26 Loss: 0.4480156898498535
Epoch: 27 Loss: 0.4402875304222107
Epoch: 28 Loss: 0.432739824056

KeyboardInterrupt: 

In [None]:
with torch.no_grad():
    y_pred = vgg1b(X_test)
    y_pred = (y_pred > 0.5).float()
    accuracy = (y_pred == y_test.unsqueeze(1)).float().mean()
    print("Accuracy on test set: ", accuracy.item())

Accuracy on test set:  0.699999988079071
