In [1]:
!python --version

Python 3.9.12


In [2]:
!pip install torch torchvision
!pip install visdom



In [2]:
import torch
import h5py
import os
import torch.nn as nn
import torchvision.models as models
from PIL import Image
import numpy as np
import torchvision.transforms as transforms
import torch.utils.data as data_utils

In [18]:
class PatchCamelyon(data_utils.Dataset):

    def __init__(self, mode='train', batch_size=32, n_iters=None, augment=False):
            super().__init__()

            self.n_iters = n_iters
            self.batch_size = batch_size

            assert mode in ['train', 'valid', 'test']
            base_name = "camelyonpatch_level_2_split_{}_{}.h5"

            print('\n')
            print("# " * 50)
            print('Loading {} dataset...'.format(mode))

            # Open the files
            self.h5X = h5py.File(os.path.join(base_name.format(mode, 'x')), 'r')
            self.h5y = h5py.File(os.path.join(base_name.format(mode, 'y')), 'r')

            # Read into numpy array
#             self.X = np.array(h5X.get('x'))
#             self.y = np.array(h5y.get('y'))

            #print('Loaded {} dataset with {} samples'.format(mode, len(self.X)))
            print("# " * 50)

            if augment:
                self.transform = transforms.Compose([transforms.ToPILImage(),
                                                     transforms.ColorJitter(brightness=.5, saturation=.25, hue=.1, contrast=.5),
                                                     transforms.RandomAffine(10, (0.05, 0.05), fillcolor=(255, 255, 255)),
                                                     transforms.RandomHorizontalFlip(.5),
                                                     transforms.RandomVerticalFlip(.5),
                                                     transforms.ToTensor(),
                                                     transforms.Normalize(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5))]
                                                   )
            else:
                self.transform = transforms.Compose([transforms.ToPILImage(),
                                                     transforms.ToTensor(),
                                                     transforms.Normalize(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5))
])

    def __getitem__(self, idx):
        #idx = item % self.__len__()
        #_slice = slice(idx*self.batch_size, (idx + 1) * self.batch_size)
        
#         self.X = np.array(h5X.get('x'))
#         self.y = np.array(h5y.get('y'))
        # size: 96x96x3
        image = np.array(self.h5X.get("x")[idx])
        # size: 1
        label = torch.tensor(self.h5y.get("y")[idx]).view(-1)
        
        image = self.transform(image)
        #labels = torch.tensor(self.y[_slice].astype(np.float32)).view(-1, 1)
        #return {'images': images, 'labels': labels}
        return image, label

    #def _transform(self, images):
    #    tensors = []
    #    for image in images:
    #        tensors.append(self.transform(image))
    #    return torch.stack(tensors)

    def __len__(self):
        #return len(self.X) // self.batch_size
        return len(self.h5X.get("x"))


In [10]:
base_name = "camelyonpatch_level_2_split_{}_{}.h5"
mode = 'train'

h5X = h5py.File(os.path.join(base_name.format(mode, 'x')), 'r')
h5y = h5py.File(os.path.join(base_name.format(mode, 'y')), 'r')
print(h5X.get("x"))
X = np.array(h5X.get('x')[0])
y = h5y.get('y')[0]
print(torch.tensor(y).shape)
#img = Image.fromarray(X[0])
# img.show()
print(X.shape)


<HDF5 dataset "x": shape (262144, 96, 96, 3), type "|u1">
torch.Size([1, 1, 1])
(96, 96, 3)


In [6]:
print('Loaded {} dataset with {} samples'.format(mode, len(X)))

Loaded train dataset with 262144 samples


In [12]:
import matplotlib.pyplot as plt

# def metrics(prediction, target):

#     prediction_binary = torch.ge(prediction, 0.5).float()
#     N = target.numel()

#     # True positives, true negative, false positives, false negatives calculation
#     tp = torch.nonzero(prediction_binary * target).shape[0]
#     tn = torch.nonzero((1 - prediction_binary) * (1 - target)).shape[0]
#     fp = torch.nonzero(prediction_binary * (1 - target)).shape[0]
#     fn = torch.nonzero((1 - prediction_binary) * target).shape[0]

#     # Metrics
#     accuracy = (tp + tn) / N
#     precision = 0. if tp == 0 else tp / (tp + fp)
#     recall = 0. if tp == 0 else tp / (tp + fn)
#     specificity = 0. if tn == 0 else tn / (tn + fp)
#     f1 = 0. if precision == 0 or recall == 0 else (2 * precision * recall) / (precision + recall)

#     return {'accuracy': accuracy, 'precision': precision, 'recall': recall, 'f1': f1, 'specificity': specificity}


def loss(prediction, target):

    w1 = 1.33  # False negative penalty
    w2 = .66  # False positive penalty

    return -torch.mean(w1 * target * torch.log(prediction.clamp_min(1e-3))
                       + w2 * (1. - target) * torch.log(1. - prediction.clamp_max(.999)))


env = 'main'
plots = {}

def plot(var_name, split_name, title_name, x, y):
    

    plt.figure(figsize=(10,5))
    plt.title(f'{title_name}')
    plt.plot(x, y, label=f'{split_name}')
    plt.xlabel(f'{var_name}')
    plt.ylabel(f'{split_name}')
    plt.legend()
    plt.show()
#     if var_name not in plots:
#         plots[var_name] = viz.line(X=np.array([x, x]), Y=np.array([y, y]), env=env, opts=dict(
#             legend=[split_name],
#             title=title_name,
#             xlabel='Iterations',
#             ylabel=var_name
#         ))
#     else:
#         viz.line(X=np.array([x]), Y=np.array([y]), env=env, win=plots[var_name], name=split_name, update='append')


def sliding_window(image_shape, window_shape, stride=None):

    if stride is None:
        stride = (window_shape[0], window_shape[1])

    # Padding
    padding_x = 0 if image_shape[1] % window_shape[1] == 0 else window_shape[1] - image_shape[1] % window_shape[1]
    padding_y = 0 if image_shape[0] % window_shape[0] == 0 else window_shape[0] - image_shape[0] % window_shape[0]
    padded_shape = (image_shape[0] + padding_y, image_shape[1] + padding_x)

    x = np.arange(0, padded_shape[1], stride[1])
    y = np.arange(0, padded_shape[0], stride[0])

    x1, y1 = np.meshgrid(x, y)

    x2 = x1 + window_shape[1]
    y2 = y1 + window_shape[0]

    return np.stack([x1, y1, x2, y2], axis=2), {'x': padding_x, 'y': padding_y}

In [8]:
back = models.mobilenet_v2(pretrained=True)
backbone = nn.Sequential(*list(back.children())[:-1])
pool = nn.MaxPool2d(3, 1)
fc = nn.Sequential(nn.Linear(1280, 1), nn.Sigmoid())



In [13]:
import torch
import torch.nn as nn
import torchvision.models as models
import torch.optim as optim

class CamelyonClassifier(nn.Module):

    def __init__(self):
        super().__init__()

        backbone = models.mobilenet_v2(pretrained=True)
        self.backbone = nn.Sequential(*list(backbone.children())[:-1])
        self.pool = nn.MaxPool2d(3, 1)
        #self.fc = nn.Sequential(nn.Linear(1280, 1), nn.Sigmoid())
        self.fc = nn.Sequential(nn.Linear(1280, 1))

        n_params = sum([p.numel() for p in self.parameters()])

        print("\n")
        print("# " * 50)
        print("MobileNet v2 initialized with {:.3e} parameters".format(n_params))
        print("# " * 50)
        print("\n")

    def forward(self, x):

        return self.fc(self.pool(self.backbone(x)).view(x.shape[0], -1))

    def print_modules(self):
        for idx, param in enumerate(self.modules()):
            print("Module : ", idx)
            print(param)
            print("\n")


if __name__ == '__main__':

    zeros = torch.zeros((2, 3, 96, 96))
    model = CamelyonClassifier()
    print(model(zeros).shape)

[W NNPACK.cpp:51] Could not initialize NNPACK! Reason: Unsupported hardware.




# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
MobileNet v2 initialized with 2.225e+06 parameters
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 


torch.Size([2, 1])


In [32]:
model = CamelyonClassifier()

dataset_train = PatchCamelyon(mode='train', batch_size=32, augment=True)
dataset_valid = PatchCamelyon(mode='valid', batch_size=32)

train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=32, shuffle=True, num_workers=0)
valid_loader = torch.utils.data.DataLoader(dataset_valid, batch_size=8, shuffle=False, num_workers=0)


optimizer = optim.Adam(model.parameters(), lr=5e-4, betas=(0.9, 0.999), weight_decay=1e-8)
# Loss function
#criterion = loss
criterion = nn.BCEWithLogitsLoss()

def train():
    model.train()

    losses = []
    
    val_losses = []
    accuracies = []
    f1s = []
    specificities = []
    precisions = []
    for iteration, (images, labels) in enumerate(train_loader):

        # Zero gradient
        optimizer.zero_grad()

        # Load data to GPU
        #sample = dataset_train[idx]
        #images = sample['images']
        #labels = sample['labels']

        # Forward pass
        #print(images.shape)
        predicted = model(images)

        # Loss
        #print(predicted)
        #predicted = predicted.type(torch.float32)
        loss = criterion(predicted, labels.float())
        losses.append(loss.item())

        # Back-propagation
        loss.backward()
        optimizer.step()

        print("{}\t\t Train Loss: {:.4f}".format(iteration, np.mean(losses)),
              end="\r")
        
        
        """
        if idx % 1000000 == 0:
            #val_loss, accuracy, f1, specificity, precision = validation()
        
            val_losses.append(val_loss)
            accuracies.append(accuracy)
            f1s.append(f1)
            specificities.append(specificity)
            precisions.append(precision)

            # Get loss and metrics from validation set
 

            # Plot train and validation loss
            plot('loss', 'train', 'Loss', np.arange(len(losses)), losses)
            plot('loss', 'validation', 'Loss',np.arange(len(val_losses)),  val_losses)

            # Plot metrics
            plot('accuracy', 'test', 'Accuracy',np.arange(len(accuracies)),  accuracies)
            plot('specificity', 'test', 'Specificity',np.arange(len(specificities)), specificities)
            plot('f1', 'test', 'F1',np.arange(len(f1s)), f1s)
            plot('precision', 'test', 'Precision', np.arange(len(precisions)), precisions)

            # Print output
            print("\nIteration: {:04d} of {:04d}\t\t Valid Loss: {:.4f}".format(idx, 10000, val_loss),
                  end="\n\n")

            # Set model to training mode again
            model.train()

        if idx % 1000000 == 0:
            torch.save(model.state_dict(), 'models/model-{:05d}.pth'.format(idx))
        """

def validation():
    model.eval()

    losses = []
    accuracy = []
    f1 = []
    specificity = []
    precision = []
    
    predictions = []
    true_values = []
    for idx in range(len(dataset_valid)): #

        sample = dataset_valid[idx] #

        # Load data to GPU
        images = sample['images'] #
        labels = sample['labels'] #

        # Forward pass
        predicted = model(images)

        # Loss
        loss = criterion(predicted, labels)
        losses.append(loss.data.item())
        
        
        predictions += ((torch.sigmoid(predicted) > 0.5) * 1.0).to_list() #
        true_values += labels.to_list() #
        
        
        
        # scikit learn classification report
        
        # DELETE ALL THIS USE SCIKITLEARN
        prediction_binary = torch.ge(predicted, 0.5).float()
        N = labels.numel()

        # True positives, true negative, false positives, false negatives calculation
        tp = torch.nonzero(prediction_binary * labels).shape[0]
        tn = torch.nonzero((1 - prediction_binary) * (1 - labels)).shape[0]
        fp = torch.nonzero(prediction_binary * (1 - labels)).shape[0]
        fn = torch.nonzero((1 - prediction_binary) * labels).shape[0]

        # Metrics
        acc = (tp + tn) / N
        prec = 0. if tp == 0 else tp / (tp + fp)
        rcl = 0. if tp == 0 else tp / (tp + fn)
        spec = 0. if tn == 0 else tn / (tn + fp)
        f_1 = 0. if prec == 0 or rcl == 0 else (2 * prec * rcl) / (prec + rcl)

        metrics = {'accuracy': acc, 'precision': prec, 'recall': rcl, 'f1': f_1, 'specificity': spec}

        accuracy.append(metrics['accuracy'])
        f1.append(metrics['f1'])
        specificity.append(metrics['specificity'])
        precision.append(metrics['precision'])

    return torch.tensor(losses).mean(), torch.tensor(accuracy).mean(), torch.tensor(f1).mean(), \
           torch.tensor(specificity).mean(), torch.tensor(precision).mean()


if __name__ == '__main__':
    train()



# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
MobileNet v2 initialized with 2.225e+06 parameters
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 




# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
Loading train dataset...
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 


# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
Loading valid dataset...
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
32		 Train Loss: 0.5744

KeyboardInterrupt: 