# Base Image Classifier

> This notebook will attempt to classify the pneumonia images using a basic CNN, implemented using PyTorch.

In [None]:
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F

from torchvision import transforms
import os

import numpy as np
import matplotlib.pyplot as plt

import XRay_utils

> Define some global variables.

In [None]:
DATA_DIR = '../Data/'

# Directories for data
TRAIN_DATA = DATA_DIR + 'train/'
TEST_DATA = DATA_DIR + 'test/'
VAL_DATA = DATA_DIR + 'val/'

RESCALE_FACTOR = 0.01
MAX_IMAGE_SIZE = tuple((np.array((3200,3200)) * RESCALE_FACTOR).astype(int))

> Load in the data.

In [None]:
transf = transforms.Compose([XRay_utils.Rescale(RESCALE_FACTOR), 
                             XRay_utils.Pad(MAX_IMAGE_SIZE, fill=0), 
                             XRay_utils.ToTensor()])

train_dataset = XRay_utils.XRayDataset(TRAIN_DATA, transform=transf)
test_dataset = XRay_utils.XRayDataset(TEST_DATA, transform=transf)
val_dataset = XRay_utils.XRayDataset(VAL_DATA, transform=transf)

print('Training Samples: {}'.format(len(train_dataset)))
print('Testing Samples: {}'.format(len(test_dataset)))
print('Valiidation Samples: {}'.format(len(val_dataset)))

In [None]:
print('Training: ')
XRay_utils.value_counts(TRAIN_DATA)
print('\nTesting: ')
XRay_utils.value_counts(TEST_DATA)
print('\nValidation: ')
XRay_utils.value_counts(VAL_DATA)

> View some of the data

In [None]:
fig = plt.figure()

for i in range(len(train_dataset)):
    
    sample = train_dataset[i]
    print(i, np.asarray(sample['image']).shape)

    ax = plt.subplot(1, 4, i + 1)
    plt.tight_layout()
    ax.set_title('{} #{}'.format(sample['class'], i))
    ax.axis('off')
    plt.imshow(sample['image'], cmap='gray')

    if i == 3:
        plt.show()
        break

In [None]:
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=2)

classes = ('NORMAL', 'PNEUMONIA')

In [None]:
dataiter = iter(trainloader)
#transforms.ToPILImage(mode='L')(dataiter.next()['image'][0]) # display image in a batch



In [None]:
net = XRay_utils.Net()

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [None]:
for i, data in enumerate(trainloader, 0):
    inputs, labels = data
    print(data['class'])
    break

In [None]:
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs = data['image'].view(4, 1, data['image'].shape[-2], data['image'].shape[-1])
        labels = torch.from_numpy(XRay_utils.label_to_num(data['class'])).long() # 0: NORMAL 1: PNEUMONIA
#        labels = np.array(data['class'])

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 200 == 199:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

In [None]:
dataiter = iter(testloader)
data = dataiter.next()
images, labels = data['image'], XRay_utils.label_to_num(data['class'])

# print images
print(labels)
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))

In [None]:
outputs = net(images.view(4, 1, 32, 32))
outputs

In [None]:
_, predicted = torch.max(outputs, 1)

print('Predicted: ', ' '.join('%5s' % classes[predicted[j]]
                              for j in range(4)))