In [541]:
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import sklearn
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import torch
from torch import Tensor
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torchvision
import torchvision.transforms as transforms

In [476]:
if torch.cuda.is_available():
    torch.cuda.set_device(0)

In [487]:
#files
bcFile = 'bcImages.npz'
lcFile = 'lcImages.npz'
leFile = 'leImages.npz'

Custom dataset and archive manager classes, and a method returning DataLoader objects

In [508]:
class imagesDataset(Dataset):
    def __init__(self, X, y):
        # self.X = torch.from_numpy(X).to(device='cuda', dtype=torch.float)
        self.X = torch.from_numpy(X).to(dtype=torch.float)
        self.X = self.X.to(memory_format=torch.contiguous_format)
        self.n = y.shape[0]
        # self.y = torch.from_numpy(y).to(device='cuda', dtype=torch.long)
        self.y = torch.from_numpy(y).to(dtype=torch.long)

    def __len__(self):
        return self.n

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

class fileSet():
    def __init__(self, filename):
        self.le = preprocessing.LabelEncoder()
        with np.load(filename, allow_pickle=True) as datafile:
            self.X = datafile['arr_0'].transpose(0, 3, 2, 1)
            self.y_labels = datafile['arr_1']
        self.y = self.le.fit_transform(self.y_labels)
        self.tts()

    def tts(self):
        X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=13, stratify=self.y) 
        self.train_ds = imagesDataset(X_train, y_train)
        self.test_ds = imagesDataset(X_test, y_test)

    def relabel(self, y):
        # check if y is on cpu or else do the move / numpy conversion first
        return self.le.inverse_transform(y.cpu().numpy())


def loadData(fileset, batchsize=128, reshuffle=False):
    if reshuffle:
        fileset.tts()
    trainLoader = DataLoader(fileset.train_ds, batch_size=batchsize, shuffle=True, num_workers=0)
    testLoader = DataLoader(fileset.test_ds, batch_size=batchsize, shuffle=True, num_workers=0)
    return trainLoader, testLoader

Loading the breast cancer image dataset

In [550]:
bcI = fileSet(bcFile)
trainLoader, testLoader = loadData(bcI, batchsize=500)

based on code from PyTorch Tutorial  
https://pytorch.org/tutorials/

In [576]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 10, kernel_size=5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=3)
        # self.conv3 = nn.Conv2d(20, 50, kernel_size=5)
        self.fc1 = nn.Linear(10580, 120) 
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 6) # number of classes

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        # x = self.pool(F.relu(self.conv3(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        print(x.size())
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


# net = Net().cuda()
net = Net()

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [577]:
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainLoader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        if i == 0:
            print(inputs.shape)
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

print('Finished Training')

torch.Size([120, 3, 100, 100])
torch.Size([120, 10580])
torch.Size([120, 3, 100, 100])
torch.Size([120, 10580])
Finished Training


In [578]:
correct = 0
total = 0
with torch.no_grad():
    for data in testLoader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(lcI.test_ds.n)
print('Accuracy of the network on the test images: %d %%' % (
    100 * correct / total))

torch.Size([31, 10580])
72
Accuracy of the network on the test images: 25 %


Given several runs of this net with several different hyperparameters (viz. number of conv layers, output sizes of each conv layer, kernel sizes, fe layer sizes), the best the net performs is barely above chance (16%) accuracy.