In [None]:
#from baseline_cnn import *

import numpy as np
import PIL
from metrics import evaluate
from mybceloss import MyBCEloss
import torchvision.models as models
from xray_dataloader_z_score import ChestXrayDataset, create_split_loaders
import torchvision
from torchvision import transforms, utils
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as func
import torch.nn.init as torch_init
import torch.optim as optim

# Setup: initialize the hyperparameters/variables
num_epochs = 1           # Number of full passes through the dataset
batch_size = 16          # Number of samples in each minibatch
learning_rate = 0.001  
seed = np.random.seed(1) # Seed the random number generator for reproducibility
p_val = 0.1              # Percent of the overall dataset to reserve for validation
p_test = 0.2             # Percent of the overall dataset to reserve for testing

def test(model,dataset):
    with torch.no_grad():
        temp_loss = 0
        temp_acc = 0
        temp_precision=0
        temp_recall = 0
        temp_BCR =0
        for minibatch_count,(images,labels) in enumerate(dataset,0):
            images,labels = images.to(computing_device),labels.to(computing_device)
            outputs = model(images)
            outputs = torch.sigmoid(outputs) 
            loss = criterion(outputs,labels)
            (acc,pre,rec,BCR),_ = evaluate(outputs.cpu().data.numpy(),label = labels.cpu().data.numpy())
            temp_loss += loss
            temp_acc +=acc
            temp_precision+=pre
            temp_recall +=rec
            temp_BCR +=BCR

        temp_loss= temp_loss/(minibatch_count+1)
        temp_acc= temp_acc/(minibatch_count+1)
        temp_precision= temp_precision/(minibatch_count+1)
        temp_recall= temp_recall/(minibatch_count+1)
        temp_BCR= temp_BCR/(minibatch_count+1)
        print("Validation loss after %d minibatch is %.3f,acc is %.3f,precision is %.3f,recall is %.3f,BCR is %.3f"%(minibatch_count,temp_loss,temp_acc,temp_precision,temp_recall,temp_BCR))
        
        return(temp_loss)
        


#TODO: Convert to Tensor - you can later add other transformations, such as Scaling here
transform = transforms.Compose([
                                transforms.Resize((224,224)),
                                transforms.RandomRotation(20, resample=PIL.Image.BILINEAR),
                                transforms.ColorJitter(brightness=64/255, contrast=.25, saturation=.25, hue=.04), 
                                #http://www.voidcn.com/article/p-dmjhonsq-bgn.html
                                transforms.RandomHorizontalFlip(p=0.5),
                                transforms.ToTensor()])


# Check if your system supports CUDA
use_cuda = torch.cuda.is_available()

# Setup GPU optimization if CUDA is supported
if use_cuda:
    computing_device = torch.device("cuda")
    extras = {"num_workers": 4, "pin_memory": True}
    print("CUDA is supported")
else: # Otherwise, train on the CPU
    computing_device = torch.device("cpu")
    extras = False
    print("CUDA NOT supported")

# Setup the training, validation, and testing dataloaders
train_loader, val_loader, test_loader = create_split_loaders(batch_size, seed, transform=transform, 
                                                             p_val=p_val, p_test=p_test,
                                                             shuffle=True, show_sample=False, 
                                                             extras=extras)

# Instantiate a densenet to run on the GPU or CPU based on CUDA support
model = torchvision.models.densenet161(pretrained='imagenet')
n_class = 14
freeze = True
if freeze:
    for i, param in model.named_parameters():
        param.requires_grad = False

num_ftrs = model.classifier.in_features
model.classifier = nn.Linear(num_ftrs, n_class)
model = model.to(computing_device)

print("Model on CUDA?", next(model.parameters()).is_cuda)

#TODO: Define the loss criterion and instantiate the gradient descent optimizer
#criterion = torch.nn.MultiLabelSoftMarginLoss() #TODO - loss criteria are defined in the torch.nn package
# criterion = torch.nn.CrossEntropyLoss() # 不知道选哪个
criterion = MyBCEloss(computing_device, True, True)

class Loss(torch.nn.Module):
    def __init__(self, weight=None, pen=1e-1):
        super(Loss, self).__init__()
        self.weight = weight
        self.pen = pen

    def forward(self, y, t):
        eps = 1e-8
        diff = torch.abs(t-y)
        c = -diff*(t*torch.log(y+eps)+self.pen*(1-t)*torch.log(1-y+eps))
        if (self.weight is not None):
            c *= self.weight

        return torch.sum(c)
WEIGHTS = torch.tensor([12.84306987, 55.5324418, 11.7501572, 7.83946301, 26.91956783, 24.54465849, 117.64952781, 30.0670421, 33.64945978, 67.95151515, 61.70610897, 91.7512275, 45.91318591, 671.37724551]).to(computing_device)/10
criterion = Loss(weight=WEIGHTS, pen=0.4)

#TODO: Instantiate the gradient descent optimizer - use Adam optimizer with default parameters
optimizer = torch.optim.Adam(model.parameters(),lr = learning_rate) #TODO - optimizers are defined in the torch.optim package

# trainer()
# Track the loss across training
total_loss = []
avg_minibatch_loss = []
validation_loss = []
# Begin training procedure
for epoch in range(num_epochs):
    N = 50
    N_minibatch_loss = 0.0  
    N_acc=0.0
    N_minibatch_acc = 0.0 
    N_minibatch_precision = 0.0
    N_minibatch_recall = 0.0 
    N_minibatch_recall = 0.0
    # Get the next minibatch of images, labels for training
    for minibatch_count, (images, labels) in enumerate(train_loader, 0):

        # Put the minibatch data in CUDA Tensors and run on the GPU if supported
        images, labels = images.to(computing_device), labels.to(computing_device)
        #print(images.requires_grad)
        # Zero out the stored gradient (buffer) from the previous iteration
        optimizer.zero_grad()

        # Perform the forward pass through the network and compute the loss
        outputs = model(images)
        outputs = torch.sigmoid(outputs) 
        output =outputs.cpu().data.numpy()
        label = labels.cpu().data.numpy()
        output[output>=0.5]=1
        output[output<0.5]=0
        acc = np.sum(output==label)/(output.shape[0]*output.shape[1])
        
#         print(acc)

#         assert 0==1
        (accuracy,precision,recall,BCR),_ = evaluate(output,label)
    
        loss = criterion(outputs, labels)
        # Automagically compute the gradients and backpropagate the loss through the network
        loss.backward()
        
        # Update the weights
        optimizer.step()
        
        # Add this iteration's loss to the total_loss
        total_loss.append(loss.item())
        N_minibatch_loss += loss
        N_minibatch_acc += accuracy
        N_acc+=acc
        #TODO: Implement validation
        if minibatch_count % N == 0:    
            # Print the loss averaged over the last N mini-batches   
            N_acc/=N
            N_minibatch_loss /= N
            N_minibatch_acc /= N
            print('Epoch %d, average minibatch %d loss: %.3f acc:%.3f,ture acc %.3f' %
                (epoch + 1, minibatch_count, N_minibatch_loss,N_minibatch_acc,N_acc ))
            # Add the averaged loss over N minibatches and reset the counter
            avg_minibatch_loss.append(N_minibatch_loss)
            N_minibatch_loss = 0.0
            N_minibatch_acc =0.0
            N_acc=0.0
            if minibatch_count %(600) ==0:
                print('here we do validation')
#                 temp_loss = test(model,val_loader)
#                 validation_loss.append(temp_loss)
                #TODO early stopping
                
    print("Finished", epoch + 1, "epochs of training") 
print("Training complete after", epoch+1, "epochs")
print("Here we do test")
_=test(model,test_loader)

In [3]:
def evaluate(output,label):
    '''
    params:
        output:predicted logits.   shape:(minibatch_size,num_class)
        label: ground label.       shape:(minibatch_size,num_class)
    return:
        (acc_list,precision_list,recall_list,BCR_list): accuracy,precision,recall and BCR per-class
        (average_acc,average_precision,average_recall,average_BCR):averaged per-class accuracy,precision... with equal weight
    '''
    output[output>=0.5]=1
    output[output<0.5]= 0
    sample_size,class_size = output.shape
    acc_list = np.zeros(class_size)
    precision_list = np.zeros(class_size)
    recall_list = np.zeros(class_size)
    BCR_list =  np.zeros(class_size)
    for class_id in range(len(acc_list)):
        sub_output = output[:,class_id]
        sub_label = label[:,class_id]
        
        acc_list[class_id] = (np.sum(sub_output==sub_label))/sample_size
       
        TP = np.sum(np.logical_and(sub_output == sub_label, sub_label == 1))
        FP = np.sum((sub_output==1) == (sub_label==0))
        FN = np.sum((sub_output==0) == (sub_label==1))
        TN = np.sum(np.logical_and(sub_output == sub_label, sub_label == 0))
#         print(np.sum(sub_output==sub_label),(TP+TN),(TP+TN+FN+FP))
        acc_list[class_id] = (TP+TN)/sample_size
        if TP ==0:
            precision_list[class_id]=0
            recall_list[class_id] =0
        else:
            precision_list[class_id] = TP/(TP+FP)
            recall_list[class_id] = TP/(TP+FN)
        BCR_list [class_id] =(precision_list[class_id]+recall_list[class_id])/2.0
        
    average_acc= np.mean(acc_list)
    average_precision = np.mean(precision_list)
    average_recall = np.mean(recall_list)
    average_BCR = np.mean(BCR_list)
    return (average_acc,average_precision,average_recall,average_BCR),(acc_list,precision_list,recall_list,BCR_list)

evaluate(output,label)
