In [1]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as func
import torch.nn.init as torch_init
import torch.optim as optim

class LSTM(nn.Module):
    """ A basic LSTM model. 
    
    Consists of one hidden layer:
    
    conv1 -> conv2 -> conv3 -> maxpool -> fc1 -> fc2 (outputs)
    """
    
    def __init__(self, in_dim, out_dim, hid_dim, batch_size, no_layers =1):
        super(LSTM, self).__init__()
        #specify the input dimensions
        self.in_dim = in_dim
        #specify the output dimensions
        self.out_dim = out_dim
        #specify the batch size
        self.batch_size = batch_size
        #specify hidden layer dimensions
        self.hid_dim = hid_dim
        #specify the number of layers
        self.no_layers = no_layers  
        
        #initialise the LSTM
        self.model = nn.LSTM(self.in_dim, self.hid_dim, self.no_layers)
        #define the outputs of the model, we're using a softmax
        self.outputs = nn.Linear(self.hidden_dim, output_dim)

    def forward(self, batch):
        """Pass the batch of images through each layer of the network, applying 
        non-linearities after each layer.
        
        Note that this function *needs* to be called "forward" for PyTorch to 
        automagically perform the forward pass. 
        
        Params:
        -------
        - batch: (Tensor) An input batch of images

        Returns:
        --------
        - logits: (Variable) The output of the network
        """
        
        #input "batch" is a tensor of dimensions batch_sizexchunk_sizexdictionary_size
        # Forward pass through LSTM layer
        # shape of lstm_out: [input_size, batch_size, hidden_dim]
        # shape of self.hidden: (a, b), where a and b both 
        # have shape (num_layers, batch_size, hidden_dim).
        lstm_out, self.hidden = self.model(batch.view(len(batch), self.batch_size, -1))
        
        # Only take the output from the final timetep
        # Can pass on the entirety of lstm_out to the next layer if it is a seq2seq prediction
        y_pred = self.outputs(lstm_out[-1].view(self.batch_size, -1))
        tag_scores = func.softmax(y_pred, dim=1)
        return tag_scores
    def init_hidden(self)
            return (torch.zeros(self.no_layers, self.batch_size, self.hid_dim),
            torch.zeros(self.no_layers, self.batch_size, self.hid_dim))

        

SyntaxError: invalid syntax (<ipython-input-1-3419ebcd6673>, line 62)

In [1]:
from baseline_cnn import *
from baseline_cnn import BasicCNN
import pdb

# Setup: initialize the hyperparameters/variables
num_epochs = 5           # Number of full passes through the dataset
batch_size = 32          # Number of samples in each minibatch
learning_rate = 0.001  
seed = np.random.seed(1) # Seed the random number generator for reproducibility
p_val = 0.1              # Percent of the overall dataset to reserve for validation
p_test = 0.2             # Percent of the overall dataset to reserve for testing

# Transform 

#TODO: Convert to Tensor - you can later add other transformations, such as Scaling here
#baseline
transform = transforms.Compose([transforms.Resize([512,512]),transforms.ToTensor()])
#transform = transforms.Compose([transforms.Resize([512,512]),transforms.RandomRotation([-180,180]), 
#                                transforms.ToTensor(), transforms.Normalize(mean=[0.5], std=[0.5])])
                  
# Check if your system supports CUDA
use_cuda = torch.cuda.is_available()

# Setup GPU optimization if CUDA is supported
if use_cuda:
    computing_device = torch.device("cuda")
    extras = {"num_workers": 2, "pin_memory": True}
    print("CUDA is supported")
else: # Otherwise, train on the CPU
    computing_device = torch.device("cpu")
    extras = False
    print("CUDA NOT supported")

# Setup the training, validation, and testing dataloaders
train_loader, val_loader, test_loader = create_split_loaders(batch_size, seed, transform=transform, 
                                                             p_val=p_val, p_test=p_test,
                                                             shuffle=True, show_sample=False, 
                                                             extras=extras)

# Instantiate a BasicCNN to run on the GPU or CPU based on CUDA support
model = BasicCNN()
model = model.to(computing_device)
print("Model on CUDA?", next(model.parameters()).is_cuda)

#TODO: Define the loss criterion and instantiate the gradient descent optimizer
criterion =torch.nn.BCELoss() #TODO - loss criteria are defined in the torch.nn package
#criterion = torch.nn.BCEWithLogitsLoss()
#criterion = custom_bce()#didnt work like this

#TODO: Instantiate the gradient descent optimizer - use Adam optimizer with default parameters
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) #TODO - optimizers are defined in the torch.optim package

CUDA is supported
BasicCNN initialized
Model on CUDA? True


In [2]:
def validate(val_loader, model, criterion):
    losses = AverageMeter()
    acc = AverageMeter()

    # switch to evaluate mode
    model.eval()  
    for minibatch_count, (images, labels) in enumerate(val_loader, 0):
        #print(minibatch_count)
        # Put the minibatch data in CUDA Tensors and run on the GPU if supported
        images, labels = images.to(computing_device), labels.to(computing_device)
        # Perform the forward pass through the network and compute the loss
        outputs = model(images)
        loss = criterion(outputs, labels)
        #loss = custom_bce(outputs, labels)
        
        modelDecision=(outputs>=0.5)
        accuracy = calculate_acc(modelDecision,labels)
        losses.update(loss.item(), labels.shape[0])
        acc.update(accuracy, labels.shape[0])

    return acc,losses
class AverageMeter(object):

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

def calculate_TP_FP_FN(modelDecision,labels):
    modelDecNp=modelDecision.cpu().numpy()
    labelsNp=labels.cpu().numpy()
    numClasses=labelsNp.shape[1]
    TP=np.zeros(numClasses)
    FP=np.zeros(numClasses)
    FN=np.zeros(numClasses)
    for i in range(numClasses):
        classOut=modelDecNp[:,i]
        classLabel=labelsNp[:,i]
        pos_label=np.argwhere(classLabel==1)
        neg_label=np.argwhere(classLabel==0)
        TP[i]=np.sum(classOut[pos_label])
        FP[i]=np.sum(classOut[neg_label])
        FN[i]=np.sum(classLabel)-TP[i]  
    return TP,FP,FN

def calculate_acc(modelDecision,labels):
    acc=torch.sum((labels==modelDecision.to(dtype=torch.float)).to(dtype=torch.float),0)/labels.shape[0]
    return acc

def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')

In [3]:
# Track the loss across training
total_loss = []
avg_minibatch_loss = []
best_loss = 100
acc_train_list=[]
acc_val_list=[]

loss_train_list=[]
loss_val_list=[]


# Begin training procedure
for epoch in range(num_epochs):
    model.train(True)
    N = 50
    N_minibatch_loss = 0.0    
    train_acc = AverageMeter()
    
    # Get the next minibatch of images, labels for training
    for minibatch_count, (images, labels) in enumerate(train_loader, 0):

        # Put the minibatch data in CUDA Tensors and run on the GPU if supported
        images, labels = images.to(computing_device), labels.to(computing_device)

        # Zero out the stored gradient (buffer) from the previous iteration
        optimizer.zero_grad()

        # Perform the forward pass through the network and compute the loss
        outputs = model(images)
        loss = criterion(outputs, labels)
        #loss = custom_bce(outputs, labels)
        # Automagically compute the gradients and backpropagate the loss through the network
        loss.backward()

        # Update the weights
        optimizer.step()

        #Calculate accuracy
        modelDecision=(outputs>=0.5)
        accuracy = calculate_acc(modelDecision,labels)
        train_acc.update(accuracy, labels.shape[0])
        
        # Add this iteration's loss to the total_loss
        total_loss.append(loss.item())
        N_minibatch_loss += loss
        #
        if minibatch_count % N == 0:    
            
            # Print the loss averaged over the last N mini-batches    
            N_minibatch_loss /= N
            print('Epoch %d, average minibatch %d loss: %.3f, average accuracy: %.3f' %
                (epoch + 1, minibatch_count, N_minibatch_loss,train_acc.avg.mean()))
            
            # Add the averaged loss over N minibatches and reset the counter
            avg_minibatch_loss.append(N_minibatch_loss)
            N_minibatch_loss = 0.0

            
    print("Finished", epoch + 1, "epochs of training")
    #TODO: Implement cross-validation
    acc_val,loss_val = validate(val_loader, model, criterion)
    
    acc_train_list.append(train_acc.avg.mean())
    acc_val_list.append((acc_val.avg).mean())
    
    loss_train_list.append(total_loss)
    loss_val_list.append(loss_val.avg)
    print('val_loss: %.3f, average accuracy of all classes: %.3f'%(loss_val.avg,(acc_val.avg).mean()))
    
    # remember best loss and save checkpoint
    is_best = loss_val.avg <= best_loss
    best_loss = max(loss_val.avg, best_loss)
    save_checkpoint({
        'epoch': epoch + 1,
        'state_dict': model.state_dict(),
        'best_loss': best_loss,
        'optimizer': optimizer.state_dict(),
        'train_loader':train_loader,
        'val_loader':val_loader,
        'test_loader':test_loader,
    }, is_best)
    
print("Training complete after", epoch, "epochs")


Epoch 1, average minibatch 0 loss: 0.015, average accuracy: 0.525
Epoch 1, average minibatch 50 loss: 0.717, average accuracy: 0.537
Epoch 1, average minibatch 100 loss: 0.687, average accuracy: 0.577
Epoch 1, average minibatch 150 loss: 0.660, average accuracy: 0.612
Epoch 1, average minibatch 200 loss: 0.638, average accuracy: 0.639
Epoch 1, average minibatch 250 loss: 0.614, average accuracy: 0.665
Epoch 1, average minibatch 300 loss: 0.594, average accuracy: 0.688
Epoch 1, average minibatch 350 loss: 0.574, average accuracy: 0.709
Epoch 1, average minibatch 400 loss: 0.556, average accuracy: 0.729
Epoch 1, average minibatch 450 loss: 0.540, average accuracy: 0.745
Epoch 1, average minibatch 500 loss: 0.521, average accuracy: 0.761
Epoch 1, average minibatch 550 loss: 0.505, average accuracy: 0.775
Epoch 1, average minibatch 600 loss: 0.491, average accuracy: 0.788
Epoch 1, average minibatch 650 loss: 0.475, average accuracy: 0.799
Epoch 1, average minibatch 700 loss: 0.463, average

Epoch 3, average minibatch 800 loss: 0.176, average accuracy: 0.949
Epoch 3, average minibatch 850 loss: 0.175, average accuracy: 0.949
Epoch 3, average minibatch 900 loss: 0.173, average accuracy: 0.949
Epoch 3, average minibatch 950 loss: 0.171, average accuracy: 0.949
Epoch 3, average minibatch 1000 loss: 0.177, average accuracy: 0.949
Epoch 3, average minibatch 1050 loss: 0.172, average accuracy: 0.948
Epoch 3, average minibatch 1100 loss: 0.170, average accuracy: 0.949
Epoch 3, average minibatch 1150 loss: 0.182, average accuracy: 0.948
Epoch 3, average minibatch 1200 loss: 0.169, average accuracy: 0.948
Epoch 3, average minibatch 1250 loss: 0.175, average accuracy: 0.948
Epoch 3, average minibatch 1300 loss: 0.172, average accuracy: 0.948
Epoch 3, average minibatch 1350 loss: 0.173, average accuracy: 0.948
Epoch 3, average minibatch 1400 loss: 0.165, average accuracy: 0.948
Epoch 3, average minibatch 1450 loss: 0.166, average accuracy: 0.948
Epoch 3, average minibatch 1500 loss: 

Epoch 5, average minibatch 1550 loss: 0.140, average accuracy: 0.951
Epoch 5, average minibatch 1600 loss: 0.137, average accuracy: 0.951
Epoch 5, average minibatch 1650 loss: 0.135, average accuracy: 0.951
Epoch 5, average minibatch 1700 loss: 0.142, average accuracy: 0.951
Epoch 5, average minibatch 1750 loss: 0.139, average accuracy: 0.951
Epoch 5, average minibatch 1800 loss: 0.144, average accuracy: 0.951
Epoch 5, average minibatch 1850 loss: 0.136, average accuracy: 0.951
Epoch 5, average minibatch 1900 loss: 0.140, average accuracy: 0.951
Epoch 5, average minibatch 1950 loss: 0.143, average accuracy: 0.951
Epoch 5, average minibatch 2000 loss: 0.142, average accuracy: 0.951
Epoch 5, average minibatch 2050 loss: 0.147, average accuracy: 0.951
Epoch 5, average minibatch 2100 loss: 0.139, average accuracy: 0.951
Epoch 5, average minibatch 2150 loss: 0.141, average accuracy: 0.951
Epoch 5, average minibatch 2200 loss: 0.142, average accuracy: 0.951
Epoch 5, average minibatch 2250 lo

In [4]:
import pickle
results = { "acc_train_list": acc_train_list, "acc_val_list": acc_val_list, "loss_train_list":loss_train_list,"loss_val_list":loss_val_list}
pickle.dump( results, open( "results_base.p", "wb" ) )

In [None]:
a=pickle.load( open( "results_base.p", "rb" ) )

In [None]:
#Evaluation metrics
modelDecision=(outputs>=0.5)
#i)
accuracy = calculate_acc(modelDecision,labels)
#other evaluation on test
TP,FP,FN = calculate_TP_FP_FN(modelDecision,labels)
precision=TP/(FP+TP+np.finfo(float).eps)
recall=TP/(TP+FN+np.finfo(float).eps)
BCR=(precision+recall)/2.0
agg_precision=np.mean(precision)
agg_recall=np.mean(recall)
agg_BCR=np.mean(BCR)

def confusion_matrix(modelDecision,labels):
    modelDecNp=modelDecision.cpu().numpy()
    labelsNp=labels.cpu().numpy()
    numClasses=labelsNp.shape[1]
    #additional row and column for no disease
    conf_mat=np.zeros((numClasses+1,numClasses+1))
    ######
    #There was also discussion in Piazza about how to do this. It is now clear with edge cases so I left this
    #####
    return conf_mat
