In [1]:
import numpy as np 
import pandas as pd 
#import pydicom
import matplotlib.pyplot as plt 

import torch 
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.optim.lr_scheduler import ReduceLROnPlateau

from PIL import Image
from skimage.transform import resize
from sklearn.metrics import confusion_matrix, roc_curve, auc
from efficientnet_pytorch import EfficientNet

In [2]:
# upload train dataframe
train_df_allsamples = pd.read_csv("../TrainTestDataFrames/marking.csv")
train_df_allsamples.head()

Unnamed: 0,patient_id,image_id,target,source,sex,age_approx,anatom_site_general_challenge
0,IP_7279968,ISIC_2637011,0,ISIC20,male,45.0,head/neck
1,IP_3075186,ISIC_0015719,0,ISIC20,female,45.0,upper extremity
2,IP_2842074,ISIC_0052212,0,ISIC20,female,50.0,lower extremity
3,IP_6890425,ISIC_0068279,0,ISIC20,female,45.0,head/neck
4,IP_8723313,ISIC_0074268,0,ISIC20,female,55.0,upper extremity


In [3]:
# create dictionary that maps image name to target 
image_names = train_df_allsamples["image_id"].values 
targets = train_df_allsamples["target"].values
img_to_target = {image_name:target for image_name, target in zip(image_names, targets)}

percent_tp = sum(targets)/len(targets) * 100 
print("{} training samples total.".format(len(targets)))
print("Only {:.3f} percent of training data set is a true positive.".format(percent_tp))
print("Therefore, the baseline accuracy is {:.3f}".format(np.max([percent_tp, 100-percent_tp])))

60487 training samples total.
Only 9.058 percent of training data set is a true positive.
Therefore, the baseline accuracy is 90.942


In [4]:
# update so that the number of positives balances negatives
train_df_pos = train_df_allsamples.iloc[targets>0, :]
train_df_neg = train_df_allsamples.iloc[targets==0, :]
train_df_negsample = train_df_neg.sample(n=int(train_df_pos.shape[0]))

# concatenate negative and positive samples, then shuffle using .sample() 
#train_val_df = pd.concat((train_df_pos, train_df_negsample)).sample(frac=1)
train_val_df = train_df_allsamples.sample(frac=.5)

train_val_split = .9
n_train_val = train_val_df.shape[0]
n_train = int(train_val_split*n_train_val)

train_df = train_val_df[:n_train]
val_df = train_val_df[n_train:]

# create dictionary that maps image name to target 
image_names = val_df["image_id"].values 
val_targets = val_df["target"].values

percent_tp = sum(val_targets)/len(val_targets) * 100 
baseline = np.max([percent_tp, 100-percent_tp])

print("{} Training and {} Validation samples".format(n_train, n_train_val-n_train))
print("{:.3f} percent of validation data set is a positive.".format(percent_tp))
print("Baseline validation accuracy is {:.3f}".format(baseline))

54438 Training and 6049 Validation samples
8.613 percent of validation data set is a positive.
Baseline validation accuracy is 91.387


In [5]:
# Device configuration (GPU can be enabled in settings)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
#device = 'cpu'
print(device)

cuda:0


In [6]:
meta_features = ['sex', 'age_approx', 'anatom_site_general_challenge'] 

encoder = {}
for feature in meta_features: 
    # determine unique features  
    categories = np.unique(np.array(train_df[feature].values, str))
    for i, category in enumerate(categories): 
        if category != 'nan':
            encoder[category] = np.float(i)
encoder['nan'] = np.nan

# define a unique transform each time a positive is resampled: 

# basic transform 
transform_1 = transforms.Compose([
    transforms.RandomRotation(degrees=5),
    transforms.ColorJitter(brightness=32. / 255.,saturation=0.5),
    transforms.RandomResizedCrop(size=256, scale=(0.5, 1.0), ratio=(0.8, 1.2)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])])

# horizontal or vertical flip 
transform_2 = transforms.Compose([
    transforms.RandomRotation(degrees=5),
    transforms.ColorJitter(brightness=32. / 255.,saturation=0.5),
    transforms.RandomResizedCrop(size=256, scale=(0.5, 1.0), ratio=(0.8, 1.2)),
    #transforms.RandomVerticalFlip(),
    transforms.RandomHorizontalFlip(1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])])

# no flip or rotation for test/validation data 
transform_valid = transforms.Compose([
    transforms.RandomResizedCrop(size=256, scale=(1.0, 1.0), ratio=(1.0, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
])

def make_weights_for_balanced_classes(df, nclasses=2):   
    targets = df["target"].values
    count = [0] * nclasses                                                      
    for label in targets:                                                         
        count[label] += 1                                                     
    weight_per_class = [0.] * nclasses                                      
    N = float(sum(count))                                                   
    for i in range(nclasses):                                                   
        weight_per_class[i] = N/float(count[i])                                 
    weight = [0] * len(targets)                                              
    for idx, label in enumerate(targets):                                          
        weight[idx] = weight_per_class[label]   
        
    return np.array(weight)  

class ValidDataset(torch.utils.data.Dataset):
    def __init__(self, df, path_to_files):
        # 1. Initialize file paths or a list of file names.
        self.path = path_to_files
        self.df = df

    def __getitem__(self, index):
        # 1. Read one data from file (e.g. using numpy.fromfile, PIL.Image.open).
        
        # load X 
        img_name = self.df['image_id'].values[index]
        img_path = self.path + img_name + ".jpg"
        img = plt.imread(img_path)
        
        # determine meta data 
        meta = self.df[meta_features].values[index]
        meta_data = np.array([encoder[str(m)] for m in meta])
        
        # load y 
        label = self.df["target"].values[index]
        target = torch.tensor(label, dtype=torch.float32)
        
        # 2. Preprocess the data (e.g. torchvision.Transform).
        img = Image.fromarray(img)
        #img = img.resize((256, 256))
        img_processed = transform_valid(img)
        # 3. Return a data pair (e.g. image and label).
        return img_processed, target
        
    def __len__(self):
        # total size of your dataset.
        return self.df.shape[0]

class MyDataLoader():
    def __init__(self, df, path, batchsize, min_balance=None):
        # store df, path, weights, ...
        self.df = df 
        self.path = path
        self.w = make_weights_for_balanced_classes(df)
        self.batchsize = batchsize 
        self.balanced = True
        self.min_balance = min_balance
        
        # create a dictionary to map image_ids to index and target in dataframe 
        image_ids = self.df['image_id'].values 
        self.targets = self.df['target'].values
        inds = np.arange(len(image_ids))
        self.imgID2Idx = {im_id:ind for (im_id, ind) in zip(image_ids, inds)}
        self.imgID2Target = {im_id:target for (im_id, target) in zip(image_ids, self.targets)}
        
        # keep track of how many times samples have been drawn 
        self.counts = np.zeros(len(image_ids))
        
    def get_batch(self):
        # get image ids for the batch 
        if np.sum(self.w > 0) >= self.batchsize:
            batch_image_ids = self.df.sample(n=self.batchsize, weights=self.w)['image_id'].values
        else:
            # update batchsize 
            print("Updating batchsize, maximum dataset size reached")
            self.batchsize = np.sum(self.w > 0)
            batch_image_ids = self.df.sample(n=self.batchsize, weights=self.w)['image_id'].values
        
        # get the index locations for the image ids 
        batch_sample_inds = [self.imgID2Idx[im_id] for im_id in batch_image_ids]
        batch_targets = [self.imgID2Target[im_id] for im_id in batch_image_ids]
        
        # Update counts 
        self.counts[batch_sample_inds] += 1
        
        # Update sampling weights so that target=0 --> w = 0, target=1 --> w /= 2 
        for ind, target in zip(batch_sample_inds, batch_targets):
            # if the sample is a negative, then we don't want to sample it again 
            # if the sample has already been sampled 2 times, it shouldn't be sampled again
            # if target is positive, sampling should happen less frequently 
            if target == 0 or self.counts[ind] == 2:
                self.w[ind] = 0 
            else:
                self.w[ind] /= 2 
        
        # Data returned in shape [Batchsize, Channels, H, W]
        images = np.zeros((self.batchsize, 3, 256, 256)) 
        labels = np.zeros(self.batchsize)
        #meta_data = np.zeros((self.batchsize, 3))
        
        for i, index in enumerate(batch_sample_inds):
            
            # 1. load image
            img_name = self.df['image_id'].values[index]
            img_path = self.path + img_name + ".jpg"
            img = plt.imread(img_path)

            # 2. Preprocess the data (e.g. torchvision.Transform).
            img = Image.fromarray(img)
            if self.counts[index] == 1:
                images[i, :, :, :] = transform_1(img)
            if self.counts[index] == 2:
                images[i, :, :, :] = transform_2(img)

            # 3. store label 
            labels[i] = self.imgID2Target[img_name]
                
        # Quit once all positive samples have zero valued weights 
        if np.sum(self.w[self.targets==1]) == 0:
            self.balanced = False
            
        # If a min balance is specified, quit at min balance
        if self.min_balance:
            if sum(labels)/len(labels) <= self.min_balance:
                self.balanced = False
        
        # return data 
        X = torch.tensor(images, dtype = torch.float32)
        y = torch.tensor(labels, dtype = torch.float32)
        return X, y #, meta_data

In [7]:
# First, load the EfficientNet with pre-trained parameters 
ENet = EfficientNet.from_pretrained('efficientnet-b0').to(device)

Loaded pretrained weights for efficientnet-b0


In [8]:
# Convolutional neural network
class MyENet(nn.Module):
    def __init__(self, ENet):
        super(MyENet, self).__init__()
        # modify output layer of the pre-trained ENet 
        self.ENet = ENet
        num_ftrs = self.ENet._fc.in_features
        self.ENet._fc = nn.Linear(in_features=num_ftrs, out_features=256, bias=True)
        # map Enet output to melanoma decision 
        self.output = nn.Sequential(nn.BatchNorm1d(256),
                                    nn.LeakyReLU(),
                                    nn.Dropout(p=0.2),
                                    nn.Linear(256, 1), 
                                    nn.Sigmoid())
        
    def embedding(self, x):
        out = self.ENet(x)
        return out 
        
    def forward(self, x):
        out = self.ENet(x)
        out = self.output(out)
        return out

model = MyENet(ENet).to(device)
#model.load_state_dict(torch.load('../Models/ENETmodel_all.ckpt'))

In [None]:
# Train the model
# Use the prebuilt data loader.
path = "../../data-512/512x512-dataset-melanoma/512x512-dataset-melanoma/"
path_to_model = '../Models/ENETmodel.ckpt'

# evaluate performance on validation data 
valid_dataset = ValidDataset(val_df, path)                                              
valid_loader = torch.utils.data.DataLoader(dataset=valid_dataset) 


# save losses from training 
num_epochs = 50
batchsize  = 25

train_roc = []
val_roc   = []
losses    = []
patience     = 3
set_patience = 3 
best_val     = 0

# Loss and optimizer
criterion = nn.BCELoss()
learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# scheduler reduces learning rate by factor of 10 when val auc does not improve
scheduler = ReduceLROnPlateau(optimizer=optimizer, min_lr=3e-6, mode='max', patience=0, verbose=True)


for epoch in range(num_epochs):
    train_loader = MyDataLoader(train_df, path, batchsize=batchsize, min_balance=.25)
    while train_loader.balanced:
        images, labels = train_loader.get_batch()

        # set up model for training 
        model = model.train()
        
        images = images.to(device)
        labels = torch.reshape(labels, [len(labels), 1])
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # store loss
        losses.append(loss)
        
        # Calculate ROC
        predictions = outputs.detach().cpu().numpy().ravel()
        targets = labels.cpu().numpy().ravel()

        fpr, tpr, _ = roc_curve(np.array(targets, np.int), np.array(predictions).ravel())
        train_roc_auc = auc(fpr, tpr)
        train_roc.append(train_roc_auc)
        
        # Calculate balance 
        balance = np.sum(targets) / len(targets)
        
        print ('Epoch [{}/{}], Balance {:.2f}, Loss: {:.4f}, Train ROC AUC: {:.4f}' 
               .format(epoch+1, num_epochs, balance, loss.item(), train_roc_auc))
        
    # prep model for evaluation
    valid_predictions = []
    valid_targets = []
    model.eval() 
    with torch.no_grad():
        for j, (images, labels) in enumerate(valid_loader):
            images = images.to(device)

            labels = torch.reshape(labels, [len(labels), 1])
            labels = labels.to(device)

            # Forward pass
            outputs = model(images)

            # Calculate val ROC
            valid_predictions += list(outputs.detach().cpu().numpy().ravel())
            valid_targets += list(labels.cpu().numpy().ravel()) 

    fpr, tpr, _ = roc_curve(np.array(valid_targets, np.int), np.array(valid_predictions).ravel())
    val_roc_auc = auc(fpr, tpr)
    val_roc.append(val_roc_auc)
    
    print ('\nEpoch [{}/{}], Val ROC AUC: {:.4f}\n'.format(epoch+1, num_epochs, val_roc_auc))
    
    # learning rate is reduced if val roc doesn't improve 
    scheduler.step(val_roc_auc)
    
    if val_roc_auc >= best_val:
        best_val = val_roc_auc
        patience = set_patience        
        torch.save(model.state_dict(), path_to_model)  
    else:
        patience -= 1
        if patience == 0:
            print('Early stopping. Best validation roc_auc: {:.3f}'.format(best_val))
            model.load_state_dict(torch.load(path_to_model), strict=False)
            break

# Load best model 
model.load_state_dict(torch.load(path_to_model))

Epoch [1/50], Balance 0.52, Loss: 0.7220, Train ROC AUC: 0.5064
Epoch [1/50], Balance 0.44, Loss: 0.6622, Train ROC AUC: 0.6299
Epoch [1/50], Balance 0.64, Loss: 0.5262, Train ROC AUC: 0.8889
Epoch [1/50], Balance 0.56, Loss: 0.5924, Train ROC AUC: 0.7597
Epoch [1/50], Balance 0.44, Loss: 0.5089, Train ROC AUC: 0.8117
Epoch [1/50], Balance 0.56, Loss: 0.4337, Train ROC AUC: 0.9026
Epoch [1/50], Balance 0.44, Loss: 0.4404, Train ROC AUC: 0.8571
Epoch [1/50], Balance 0.48, Loss: 0.9762, Train ROC AUC: 0.6026
Epoch [1/50], Balance 0.52, Loss: 0.4085, Train ROC AUC: 0.9167
Epoch [1/50], Balance 0.40, Loss: 0.6750, Train ROC AUC: 0.7867
Epoch [1/50], Balance 0.56, Loss: 0.6298, Train ROC AUC: 0.7792
Epoch [1/50], Balance 0.60, Loss: 0.6113, Train ROC AUC: 0.7400
Epoch [1/50], Balance 0.52, Loss: 0.4318, Train ROC AUC: 0.8654
Epoch [1/50], Balance 0.44, Loss: 0.4678, Train ROC AUC: 0.8442
Epoch [1/50], Balance 0.56, Loss: 0.5445, Train ROC AUC: 0.8247
Epoch [1/50], Balance 0.56, Loss: 0.5809

Epoch [2/50], Balance 0.52, Loss: 0.4296, Train ROC AUC: 0.8910
Epoch [2/50], Balance 0.52, Loss: 0.4388, Train ROC AUC: 0.9103
Epoch [2/50], Balance 0.52, Loss: 0.4138, Train ROC AUC: 0.9231
Epoch [2/50], Balance 0.44, Loss: 0.3410, Train ROC AUC: 0.9351
Epoch [2/50], Balance 0.52, Loss: 0.4304, Train ROC AUC: 0.8718
Epoch [2/50], Balance 0.56, Loss: 0.5088, Train ROC AUC: 0.8571
Epoch [2/50], Balance 0.52, Loss: 0.4838, Train ROC AUC: 0.8269
Epoch [2/50], Balance 0.36, Loss: 0.4424, Train ROC AUC: 0.9097
Epoch [2/50], Balance 0.36, Loss: 0.3579, Train ROC AUC: 0.9514
Epoch [2/50], Balance 0.40, Loss: 0.5353, Train ROC AUC: 0.8333
Epoch [2/50], Balance 0.44, Loss: 0.3656, Train ROC AUC: 0.9156
Epoch [2/50], Balance 0.48, Loss: 0.4678, Train ROC AUC: 0.8462
Epoch [2/50], Balance 0.52, Loss: 0.4318, Train ROC AUC: 0.9231
Epoch [2/50], Balance 0.56, Loss: 0.4622, Train ROC AUC: 0.8766
Epoch [2/50], Balance 0.48, Loss: 0.3441, Train ROC AUC: 0.8974
Epoch [2/50], Balance 0.64, Loss: 0.5425

Epoch [4/50], Balance 0.44, Loss: 0.3412, Train ROC AUC: 0.9740
Epoch [4/50], Balance 0.48, Loss: 0.4379, Train ROC AUC: 0.8654
Epoch [4/50], Balance 0.52, Loss: 0.3603, Train ROC AUC: 0.9167
Epoch [4/50], Balance 0.52, Loss: 0.3197, Train ROC AUC: 0.9551
Epoch [4/50], Balance 0.56, Loss: 0.2715, Train ROC AUC: 0.9805
Epoch [4/50], Balance 0.24, Loss: 0.5248, Train ROC AUC: 0.9561

Epoch [4/50], Val ROC AUC: 0.8980

Epoch [5/50], Balance 0.56, Loss: 0.3427, Train ROC AUC: 0.9545
Epoch [5/50], Balance 0.64, Loss: 0.3756, Train ROC AUC: 0.9097
Epoch [5/50], Balance 0.72, Loss: 0.5912, Train ROC AUC: 0.7143
Epoch [5/50], Balance 0.60, Loss: 0.5555, Train ROC AUC: 0.8067
Epoch [5/50], Balance 0.68, Loss: 0.3636, Train ROC AUC: 0.9412
Epoch [5/50], Balance 0.40, Loss: 0.3014, Train ROC AUC: 0.9800
Epoch [5/50], Balance 0.60, Loss: 0.4651, Train ROC AUC: 0.8533
Epoch [5/50], Balance 0.72, Loss: 0.6137, Train ROC AUC: 0.8016
Epoch [5/50], Balance 0.44, Loss: 0.4262, Train ROC AUC: 0.9156
Epoc

Epoch [6/50], Balance 0.56, Loss: 0.3386, Train ROC AUC: 0.9221
Epoch [6/50], Balance 0.52, Loss: 0.4833, Train ROC AUC: 0.8590
Epoch [6/50], Balance 0.48, Loss: 0.4007, Train ROC AUC: 0.8782
Epoch [6/50], Balance 0.44, Loss: 0.3389, Train ROC AUC: 0.9156
Epoch [6/50], Balance 0.44, Loss: 0.4143, Train ROC AUC: 0.9286
Epoch [6/50], Balance 0.52, Loss: 0.4263, Train ROC AUC: 0.8974
Epoch [6/50], Balance 0.44, Loss: 0.3383, Train ROC AUC: 0.9351
Epoch [6/50], Balance 0.36, Loss: 0.5471, Train ROC AUC: 0.8403
Epoch [6/50], Balance 0.56, Loss: 0.5157, Train ROC AUC: 0.7792
Epoch [6/50], Balance 0.56, Loss: 0.5279, Train ROC AUC: 0.7857
Epoch [6/50], Balance 0.40, Loss: 0.5075, Train ROC AUC: 0.8533
Epoch [6/50], Balance 0.40, Loss: 0.4808, Train ROC AUC: 0.8333
Epoch [6/50], Balance 0.44, Loss: 0.4977, Train ROC AUC: 0.8571
Epoch [6/50], Balance 0.44, Loss: 0.2840, Train ROC AUC: 0.9740
Epoch [6/50], Balance 0.20, Loss: 0.5693, Train ROC AUC: 0.8900

Epoch [6/50], Val ROC AUC: 0.9151

Epoc

Epoch [7/50], Balance 0.64, Loss: 0.3734, Train ROC AUC: 0.9514
Epoch [7/50], Balance 0.56, Loss: 0.3389, Train ROC AUC: 0.9481
Epoch [7/50], Balance 0.52, Loss: 0.3320, Train ROC AUC: 0.9231
Epoch [7/50], Balance 0.56, Loss: 0.3831, Train ROC AUC: 0.9156
Epoch [7/50], Balance 0.44, Loss: 0.4752, Train ROC AUC: 0.8506
Epoch [7/50], Balance 0.28, Loss: 0.7121, Train ROC AUC: 0.7698
Epoch [7/50], Balance 0.64, Loss: 0.4222, Train ROC AUC: 0.9167
Epoch [7/50], Balance 0.40, Loss: 0.3283, Train ROC AUC: 0.9533
Epoch [7/50], Balance 0.48, Loss: 0.4073, Train ROC AUC: 0.8846
Epoch [7/50], Balance 0.44, Loss: 0.6569, Train ROC AUC: 0.7727
Epoch [7/50], Balance 0.44, Loss: 0.4080, Train ROC AUC: 0.8896
Epoch [7/50], Balance 0.24, Loss: 0.4762, Train ROC AUC: 0.9298

Epoch [7/50], Val ROC AUC: 0.9199

Epoch [8/50], Balance 0.64, Loss: 0.6519, Train ROC AUC: 0.7500
Epoch [8/50], Balance 0.60, Loss: 0.5422, Train ROC AUC: 0.8533
Epoch [8/50], Balance 0.60, Loss: 0.2482, Train ROC AUC: 0.9800
Epoc

Epoch [8/50], Balance 0.52, Loss: 0.3309, Train ROC AUC: 0.9231
Epoch [8/50], Balance 0.64, Loss: 0.4402, Train ROC AUC: 0.9236
Epoch [8/50], Balance 0.48, Loss: 0.3410, Train ROC AUC: 0.9295
Epoch [8/50], Balance 0.36, Loss: 0.3298, Train ROC AUC: 0.9444
Epoch [8/50], Balance 0.44, Loss: 0.5487, Train ROC AUC: 0.8377
Epoch [8/50], Balance 0.48, Loss: 0.4661, Train ROC AUC: 0.8333
Epoch [8/50], Balance 0.48, Loss: 0.2923, Train ROC AUC: 0.9487
Epoch [8/50], Balance 0.48, Loss: 0.4990, Train ROC AUC: 0.8590
Epoch [8/50], Balance 0.56, Loss: 0.6847, Train ROC AUC: 0.7987
Epoch [8/50], Balance 0.52, Loss: 0.3445, Train ROC AUC: 0.9487
Epoch [8/50], Balance 0.40, Loss: 0.3386, Train ROC AUC: 0.9267
Epoch [8/50], Balance 0.36, Loss: 0.2870, Train ROC AUC: 0.9722
Epoch [8/50], Balance 0.52, Loss: 0.3683, Train ROC AUC: 0.9167
Epoch [8/50], Balance 0.44, Loss: 0.2876, Train ROC AUC: 0.9351
Epoch [8/50], Balance 0.44, Loss: 0.4105, Train ROC AUC: 0.8831
Epoch [8/50], Balance 0.48, Loss: 0.5255

Epoch [9/50], Balance 0.60, Loss: 0.6633, Train ROC AUC: 0.7533
Epoch [9/50], Balance 0.32, Loss: 0.5530, Train ROC AUC: 0.8382
Epoch [9/50], Balance 0.48, Loss: 0.3111, Train ROC AUC: 0.9231
Epoch [9/50], Balance 0.48, Loss: 0.5060, Train ROC AUC: 0.7308
Epoch [9/50], Balance 0.64, Loss: 0.7196, Train ROC AUC: 0.7917
Epoch [9/50], Balance 0.68, Loss: 0.5896, Train ROC AUC: 0.8309
Epoch [9/50], Balance 0.52, Loss: 0.3254, Train ROC AUC: 0.9359
Epoch [9/50], Balance 0.40, Loss: 0.3078, Train ROC AUC: 0.9867
Epoch [9/50], Balance 0.60, Loss: 0.4397, Train ROC AUC: 0.8933
Epoch [9/50], Balance 0.40, Loss: 0.2488, Train ROC AUC: 1.0000
Epoch [9/50], Balance 0.48, Loss: 0.3739, Train ROC AUC: 0.9487
Epoch [9/50], Balance 0.36, Loss: 0.3366, Train ROC AUC: 0.9722
Epoch [9/50], Balance 0.56, Loss: 0.3179, Train ROC AUC: 0.9481
Epoch [9/50], Balance 0.36, Loss: 0.3878, Train ROC AUC: 0.9167
Epoch [9/50], Balance 0.48, Loss: 0.2516, Train ROC AUC: 0.9808
Epoch [9/50], Balance 0.60, Loss: 0.4401

Epoch [13/50], Balance 0.40, Loss: 0.3813, Train ROC AUC: 0.9200
Epoch [13/50], Balance 0.60, Loss: 0.3641, Train ROC AUC: 0.9333
Epoch [13/50], Balance 0.36, Loss: 0.2448, Train ROC AUC: 0.9931
Epoch [13/50], Balance 0.40, Loss: 0.3290, Train ROC AUC: 0.9667
Epoch [13/50], Balance 0.56, Loss: 0.2842, Train ROC AUC: 0.9740
Epoch [13/50], Balance 0.60, Loss: 0.3432, Train ROC AUC: 0.9600
Epoch [13/50], Balance 0.40, Loss: 0.4463, Train ROC AUC: 0.9000
Epoch [13/50], Balance 0.56, Loss: 0.2546, Train ROC AUC: 0.9870
Epoch [13/50], Balance 0.48, Loss: 0.2158, Train ROC AUC: 1.0000
Epoch [13/50], Balance 0.40, Loss: 0.3766, Train ROC AUC: 0.9200
Epoch [13/50], Balance 0.48, Loss: 0.3873, Train ROC AUC: 0.9038
Epoch [13/50], Balance 0.48, Loss: 0.4806, Train ROC AUC: 0.8333
Epoch [13/50], Balance 0.64, Loss: 0.4471, Train ROC AUC: 0.9375
Epoch [13/50], Balance 0.64, Loss: 0.5257, Train ROC AUC: 0.8542
Epoch [13/50], Balance 0.44, Loss: 0.3655, Train ROC AUC: 0.9286
Epoch [13/50], Balance 0.

Epoch [13/50], Balance 0.52, Loss: 0.3093, Train ROC AUC: 0.9423
Epoch [13/50], Balance 0.32, Loss: 0.4490, Train ROC AUC: 0.9191
Epoch [13/50], Balance 0.64, Loss: 0.6756, Train ROC AUC: 0.7778
Epoch [13/50], Balance 0.52, Loss: 0.2480, Train ROC AUC: 0.9808
Epoch [13/50], Balance 0.60, Loss: 0.4366, Train ROC AUC: 0.9200
Epoch [13/50], Balance 0.60, Loss: 0.2633, Train ROC AUC: 0.9733
Epoch [13/50], Balance 0.52, Loss: 0.2849, Train ROC AUC: 0.9487
Epoch [13/50], Balance 0.60, Loss: 0.3201, Train ROC AUC: 0.9333
Epoch [13/50], Balance 0.40, Loss: 0.4596, Train ROC AUC: 0.8867
Epoch [13/50], Balance 0.48, Loss: 0.3859, Train ROC AUC: 0.9103
Epoch [13/50], Balance 0.44, Loss: 0.4137, Train ROC AUC: 0.8831
Epoch [13/50], Balance 0.48, Loss: 0.2301, Train ROC AUC: 0.9936
Epoch [13/50], Balance 0.52, Loss: 0.2853, Train ROC AUC: 0.9615
Epoch [13/50], Balance 0.64, Loss: 0.3355, Train ROC AUC: 0.9444
Epoch [13/50], Balance 0.44, Loss: 0.4627, Train ROC AUC: 0.8506
Epoch [13/50], Balance 0.

In [None]:
plt.style.use('seaborn-colorblind')
plt.rcParams.update({'font.size': 16, 
                     'legend.framealpha':1, 
                     'legend.edgecolor':'inherit'}) 
plt.figure(figsize=(9, 6))

plt.plot(losses,label='Train loss')
#plt.ylim([.4, .8])
plt.legend()
plt.show()

In [None]:
plt.style.use('seaborn-colorblind')
plt.rcParams.update({'font.size': 16, 
                     'legend.framealpha':1, 
                     'legend.edgecolor':'inherit'}) 
plt.figure(figsize=(9, 6))

plt.plot(train_roc, label = 'Train ROC AUC')
plt.legend()
plt.show()

In [None]:
plt.style.use('seaborn-colorblind')
plt.rcParams.update({'font.size': 16, 
                     'legend.framealpha':1, 
                     'legend.edgecolor':'inherit'}) 
plt.figure(figsize=(9, 6))

plt.plot(val_roc, label = 'Validation ROC AUC')
plt.legend()
plt.show()

In [None]:
valid_predictions = []
valid_targets = []

model.eval() # prep model for evaluation
with torch.no_grad():
    for i, (images, labels) in enumerate(valid_loader):
        images = images.to(device)

        labels = torch.reshape(labels, [len(labels), 1])
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        valid_predictions += list(outputs.detach().cpu().numpy().ravel())
        valid_targets += list(labels.cpu().numpy().ravel()) 

fpr, tpr, _ = roc_curve(np.array(valid_targets, np.int), np.array(valid_predictions).ravel())
roc_auc = auc(fpr, tpr)

percent_tp = sum(valid_targets)/len(valid_targets) * 100 
baseline = np.max([percent_tp, 100-percent_tp])
acc = 100 * np.sum(np.round(valid_predictions) == np.array(valid_targets)) / len(valid_targets)

print('\nBaseline classification accuracy: {:.2f}'.format(baseline))
print('\nModel classification accuracy:    {:.2f}'.format(acc))

plt.style.use('seaborn-colorblind')
plt.rcParams.update({'font.size': 16, 
                     'legend.framealpha':1, 
                     'legend.edgecolor':'inherit'}) 
plt.figure(figsize=(9, 6))

lw = 2
plt.plot(fpr, tpr, 
         lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.legend(loc="lower right")

plt.tight_layout()
plt.show()

In [None]:
tn, fp, fn, tp = confusion_matrix(np.array(valid_targets, np.int), np.round(np.array(valid_predictions).ravel())).ravel()

accuracy = (tp + tn) / len(valid_targets)
precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Model accuracy: {:.2f}".format(accuracy))
print("Model precision: {:.2f}".format(precision))
print("Model recall: {:.2f}".format(recall))

print("\nConfusion Matrix: ")
print(confusion_matrix(np.array(valid_targets, np.int), np.round(np.array(valid_predictions).ravel())))

In [None]:
plt.style.use('seaborn-colorblind')
plt.rcParams.update({'font.size': 16, 
                     'legend.framealpha':1, 
                     'legend.edgecolor':'inherit'}) 
plt.figure(figsize=(9, 6))

plt.hist(valid_predictions)
plt.xlabel("P(y=malignant | x)")
plt.show()

In [None]:
#train_df.to_csv("ENET_train_df_all.csv", index=False)
#val_df.to_csv("ENET_val_df_all.csv", index=False)