# Testing the convNet model on real input data

In [1]:
from src.convNet.model import convNet
from torchvision import datasets, transforms
import torch
import numpy as np

In [9]:
H = W = 32
# Define transformations
transform = {
    'train': transforms.Compose([
        transforms.Resize([H,W]),
        transforms.RandomHorizontalFlip(),
        #transforms.RandomAffine(
           # degrees=(-15,15),
            #translate=(0,.3),
            #scale=(.01, .2),
            #shear=(.01, .2),
        #),
        transforms.ToTensor(),
        #transforms.Normalize(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5))
    ]),
    'test': transforms.Compose([
        transforms.Resize([H,W]),
        transforms.ToTensor(),
        #transforms.Normalize(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5))
    ])
}

In [10]:
train_data_folder = "/Users/jasperginn/PycharmProjects/Pneumonia/data/cell_images/train"
val_data_folder = "/Users/jasperginn/PycharmProjects/Pneumonia/data/cell_images/val"
# Set up data loaders
train_dataset = datasets.ImageFolder(
    root = train_data_folder,
    transform = transform["train"],
)
# Get classes
#print(train_dataset.class_to_idx)
#train_dataset.class_to_idx = {"Uninfected": 0, "Parasitized": 1}

# Validation data
val_dataset = datasets.ImageFolder(
    root = val_data_folder,
    transform = transform["test"]
)
#val_dataset.class_to_idx = {"Uninfected": 0, "Parasitized": 1}

We set up the dataloader

In [11]:
train_data_loader = torch.utils.data.DataLoader(
    dataset=train_dataset,
    batch_size=64,
    shuffle=True,
    num_workers=4
)

val_data_loader = torch.utils.data.DataLoader(
    dataset=val_dataset,
    batch_size=64,
    shuffle=True,
    num_workers=4
)

#for i in range(10):
#    batch_x, batch_y = next(iter(data_loader))
#    print(np.shape(batch_x), batch_y)

In [12]:
net = convNet(H, 32)
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.11)

In [41]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F


def compute_padding_size(image_dim: int, stride: int, kernel_size: int) -> int:
    """Compute the padding size given an input image of image_dim x image_dim,
        a stride and a filter size"""
    return int(math.ceil(((image_dim - 1) * stride + kernel_size - image_dim) / 2))


def compute_layer_size_conv2d(
    image_dim: int, stride: int, kernel_size: int, padding: int
) -> int:
    return int(((image_dim - kernel_size + 2 * padding) / stride) + 1)


class Net(nn.Module):
    def __init__(self, image_dim: int):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 5, padding=compute_padding_size(32, 1, 5))
        self.pool1 = nn.MaxPool2d(3, 2, padding=compute_padding_size(32, 1, 3))
        self.conv2 = nn.Conv2d(16, 32, 5, padding=compute_padding_size(16, 1, 5))
        self.pool2 = nn.MaxPool2d(3, 2, padding=compute_padding_size(16, 1, 3))
        self.conv3 = nn.Conv2d(32, 64, 3, padding=compute_padding_size(8, 1, 3))
        self.pool3 = nn.MaxPool2d(3, 2,padding=compute_padding_size(16, 1, 3))
        self.conv4 = nn.Conv2d(64, 128, 3)
        self.global_avg_pool = nn.AvgPool2d(2, 2)
        self.flatten = nn.Flatten()
        self.dropout = nn.Dropout(0.4)
        self.fc1 = nn.Linear(128, 64)
        self.bn1 = nn.BatchNorm1d(64)
        self.fc2 = nn.Linear(64, 16)
        self.bn2 = nn.BatchNorm1d(16)
        self.fc3 = nn.Linear(16, 1)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = F.relu(self.conv3(x))
        x = self.pool3(x)
        x = F.relu(self.conv4(x))
        x = self.global_avg_pool(x)
        x = self.flatten(x)
        x = self.dropout(x)
        x = self.bn1(F.relu(self.fc1(x)))
        x = self.bn2(F.relu(self.fc2(x)))
        x = self.fc3(x)
        return x

In [42]:
net = Net(H)
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
from torchsummary import summary
summary(net, (3, H, W))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 32, 32]           1,216
         MaxPool2d-2           [-1, 16, 16, 16]               0
            Conv2d-3           [-1, 32, 16, 16]          12,832
         MaxPool2d-4             [-1, 32, 8, 8]               0
            Conv2d-5             [-1, 64, 8, 8]          18,496
         MaxPool2d-6             [-1, 64, 4, 4]               0
            Conv2d-7            [-1, 128, 2, 2]          73,856
         AvgPool2d-8            [-1, 128, 1, 1]               0
           Flatten-9                  [-1, 128]               0
          Dropout-10                  [-1, 128]               0
           Linear-11                   [-1, 64]           8,256
      BatchNorm1d-12                   [-1, 64]             128
           Linear-13                   [-1, 16]           1,040
      BatchNorm1d-14                   

In [44]:
for epoch in range(3):
    running_loss = 0.0
    acc = 0
    batches = 0
    for i, data in enumerate(train_data_loader, 0):
        batch_x, batch_y = data
        # Zero gradients
        optimizer.zero_grad()
        # Forward pass, backward pass
        outputs = net(batch_x)
        loss = criterion(outputs.view(-1), batch_y.type(torch.FloatTensor))
        loss.backward()
        # Optimize parameters
        optimizer.step()
        # print statistics
        running_loss += loss.item()
        if i % 20 == 19:    # print every 20 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 20))
            running_loss = 0.0
        batches += 1
        outputs_class = outputs > 0
        acc_current = torch.sum(outputs_class.view(-1) == batch_y).numpy() / batch_y.shape[0]
        acc += acc_current
    acc /= batches
    print("Accuracy on train set is: %s" % acc)
    # On cross-validation set
    with torch.no_grad():
        acc = 0
        batches = 0
        for i, data in enumerate(val_data_loader, 0):
            batch_x, batch_y = data
            outputs = net(batch_x)
            loss = criterion(outputs.view(-1), batch_y.type(torch.FloatTensor)).item()
            # Predict
            outputs_class = outputs > 0
            acc_current = torch.sum(outputs_class.view(-1) == batch_y).numpy() / batch_y.shape[0]
            batches += 1
            acc += acc_current
        acc /= batches
        print("Accuracy on validation set is: %s" % acc)

[1,    20] loss: 0.277
[1,    40] loss: 0.280
[1,    60] loss: 0.320
[1,    80] loss: 0.266
[1,   100] loss: 0.271
[1,   120] loss: 0.234
[1,   140] loss: 0.284
[1,   160] loss: 0.242
[1,   180] loss: 0.251
[1,   200] loss: 0.242
[1,   220] loss: 0.232
[1,   240] loss: 0.235
[1,   260] loss: 0.244
[1,   280] loss: 0.209
[1,   300] loss: 0.237
[1,   320] loss: 0.182
[1,   340] loss: 0.193
[1,   360] loss: 0.211
[1,   380] loss: 0.196
[1,   400] loss: 0.201
[1,   420] loss: 0.188
Accuracy on train set is: 0.9078352808747856
Accuracy on validation set is: 0.8929148706896551
[2,    20] loss: 0.205
[2,    40] loss: 0.195
[2,    60] loss: 0.187
[2,    80] loss: 0.184
[2,   100] loss: 0.203
[2,   120] loss: 0.150
[2,   140] loss: 0.179
[2,   160] loss: 0.166
[2,   180] loss: 0.184
[2,   200] loss: 0.188
[2,   220] loss: 0.175
[2,   240] loss: 0.180
[2,   260] loss: 0.186
[2,   280] loss: 0.171
[2,   300] loss: 0.177
[2,   320] loss: 0.230
[2,   340] loss: 0.220
[2,   360] loss: 0.190
[2,   38

In [148]:
with torch.no_grad():
    acc = 0
    batches = 0
    for i, data in enumerate(val_data_loader, 0):
        batch_x, batch_y = data
        outputs = net(batch_x)
        loss = criterion(outputs.view(-1), batch_y.type(torch.FloatTensor)).item()
        # Predict
        outputs_class = outputs > 0
        acc_current = torch.sum(outputs_class.view(-1) == batch_y).numpy() / batch_y.shape[0]
        batches += 1
        acc += acc_current
    acc /= batches
    print("Accuracy on validation set is: %s" % acc)

Accuracy on validation set is: 0.9272629310344828


In [38]:
torch.round(outputs).view(-1).type(torch.LongTensor) == batch_y

tensor([False,  True, False,  True,  True, False,  True,  True,  True,  True,
         True, False,  True, False, False,  True,  True, False,  True,  True,
         True, False, False,  True, False,  True, False,  True,  True,  True,
        False, False,  True,  True,  True,  True, False, False,  True, False,
         True,  True, False, False, False, False,  True,  True, False, False,
        False,  True, False, False,  True,  True,  True, False, False,  True,
        False,  True,  True,  True])

In [147]:
torch.sum(outputs_class.view(-1) == batch_y).numpy() / batch_y.shape[0]

0.9482758620689655