In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
import torchvision.utils as utils
import torchvision.transforms.functional as F
import torch.optim as optim
from sklearn.metrics import accuracy_score, recall_score, precision_score

2024-01-26 13:59:49.560169: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
transform = transforms.Compose([
    transforms.Resize((100, 100)),  # Resize the image to a 100x100 square
    transforms.ToTensor(),          # Convert the image to a PyTorch tensor
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize the image
])

In [3]:
dataset = datasets.ImageFolder(root='petimages', transform=transform)

In [4]:
test_set, train_set = torch.utils.data.random_split(dataset, [int(0.2 * len(dataset)), len(dataset) - int(0.2 * len(dataset))])

In [5]:
learning_rate = 0.001
batch_size = 32
epoch_size = 20

In [6]:
trainloader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
testloader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False)

In [7]:
trainloader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
testloader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False)

In [8]:
class CNN(nn.Module):

    # there is no "correct" CNN model architecture for this lab, you can start with a naive model as follows:
    # convolution -> relu -> pool -> convolution -> relu -> pool -> convolution -> relu -> pool -> linear -> relu -> linear -> relu -> linear
    # you can try increasing number of convolution layers or try totally different model design
    # convolution: nn.Conv2d (https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html)
    # pool: nn.MaxPool2d (https://pytorch.org/docs/stable/generated/torch.nn.MaxPool2d.html)
    # linear: nn.Linear (https://pytorch.org/docs/stable/generated/torch.nn.Linear.html)

    def __init__(self):
        super(CNN,self).__init__()
        self.convolution_one = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.relu_one = nn.ReLU()
        self.pool_one = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.convolution_two = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.relu_two = nn.ReLU()
        self.pool_two = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.convolution_three = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.relu_three = nn.ReLU()
        self.pool_three = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc_one = nn.Linear(64 * 12 * 12, 128)  # Adjust the input size based on your data
        self.relu_four = nn.ReLU()
        
        self.fc_two = nn.Linear(128, 64)
        self.relu_five = nn.ReLU()
        
        self.fc_three = nn.Linear(64, 2)

    def forward(self, x):
        x = self.pool_one(self.relu_one(self.convolution_one(x)))
        x = self.pool_two(self.relu_two(self.convolution_two(x)))
        x = self.pool_three(self.relu_three(self.convolution_three(x)))
        
        x = x.view(x.size(0), -1)
        
        x = self.relu_four(self.fc_one(x))
        x = self.relu_five(self.fc_two(x))
        x = self.fc_three(x)
        
        return x

In [9]:
device = 'cuda' if torch.cuda.is_available() else 'cpu' # whether your device has GPU
cnn = CNN().to(device) # move the model to GPU

In [10]:
# search in official website for CrossEntropyLoss
criterion = nn.CrossEntropyLoss()

In [11]:
# try Adam optimizer (https://pytorch.org/docs/stable/generated/torch.optim.Adam.html) with learning rate 0.0001, feel free to use other optimizer
optimizer = optim.Adam(cnn.parameters(), lr=0.0001)

In [12]:
cnn.train()

CNN(
  (convolution_one): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu_one): ReLU()
  (pool_one): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (convolution_two): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu_two): ReLU()
  (pool_two): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (convolution_three): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu_three): ReLU()
  (pool_three): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc_one): Linear(in_features=9216, out_features=128, bias=True)
  (relu_four): ReLU()
  (fc_two): Linear(in_features=128, out_features=64, bias=True)
  (relu_five): ReLU()
  (fc_three): Linear(in_features=64, out_features=2, bias=True)
)

In [13]:
for epoch in range(epoch_size): # begin with trying 10 epochs 

    loss = 0.0 # you can print out average loss per batch every certain batches

    for i, data in enumerate(trainloader, 0):
        # get the inputs and label from dataloader
        inputs, label = data
        # move tensors to your current device (cpu or gpu)
        inputs = inputs.to(device)
        label = label.to(device)

        # zero the parameter gradients using zero_grad()
        optimizer.zero_grad()

        # forward -> compute loss -> backward propogation -> optimize (see tutorial mentioned in main documentation)
        outputs = cnn(inputs)  # Forward pass
        loss = criterion(outputs, label)  # Compute the loss
        loss.backward()  # Backward propagation
        optimizer.step()  # Optimization step
        # print some statistics
        loss += loss.item() # add loss for current batch 
        if i % 100 == 99:    # print out average loss every 100 batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {loss / 100:.3f}')
            loss = 0.0

[1,   100] loss: 0.014
[2,   100] loss: 0.013
[3,   100] loss: 0.012
[4,   100] loss: 0.010
[5,   100] loss: 0.014
[6,   100] loss: 0.011
[7,   100] loss: 0.010
[8,   100] loss: 0.009
[9,   100] loss: 0.009
[10,   100] loss: 0.007
[11,   100] loss: 0.012
[12,   100] loss: 0.011
[13,   100] loss: 0.006
[14,   100] loss: 0.013
[15,   100] loss: 0.006
[16,   100] loss: 0.004
[17,   100] loss: 0.006
[18,   100] loss: 0.009
[19,   100] loss: 0.011
[20,   100] loss: 0.005


In [14]:
print('Finished Training')

Finished Training


In [15]:
ground_truth = []
prediction = []

In [16]:
cnn.eval() 

CNN(
  (convolution_one): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu_one): ReLU()
  (pool_one): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (convolution_two): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu_two): ReLU()
  (pool_two): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (convolution_three): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu_three): ReLU()
  (pool_three): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc_one): Linear(in_features=9216, out_features=128, bias=True)
  (relu_four): ReLU()
  (fc_two): Linear(in_features=128, out_features=64, bias=True)
  (relu_five): ReLU()
  (fc_three): Linear(in_features=64, out_features=2, bias=True)
)

In [17]:
with torch.no_grad(): # since we're not training, we don't need to calculate the gradients for our outputs, so turn on no_grad mode
    for data in testloader:
        inputs, labels = data
        inputs = inputs.to(device)
        ground_truth += labels.tolist() # convert labels to list and append to ground_truth
        # calculate outputs by running inputs through the network
        outputs = cnn(inputs)
        # the class with the highest logit is what we choose as prediction
#         _, predicted = torch.max(...)
        _, predicted = torch.max(outputs, 1)
#         prediction += ... # convert predicted to list and append to prediction
        prediction += predicted.tolist()

In [18]:
# GradeScope is chekcing for these three variables, you can use sklearn to calculate the scores
# accuracy = accuracy_score(...)
# recall = recall_score(...)
# precision = precision_score(...)

accuracy = accuracy_score(ground_truth, prediction)
recall = recall_score(ground_truth, prediction, average='weighted')  # 'weighted' accounts for multi-class problems
precision = precision_score(ground_truth, prediction, average='weighted')

In [19]:
print(accuracy, recall, precision)

0.7775 0.7775 0.7804778461108393
