In [1]:
import sys
import os, os.path

sys.path.append(os.path.join(os.getcwd() ,'/modules'))
root_path = "C:/git/Springboard-Public/Capstone Project 2/"
IN_COLAB = 'google.colab' in sys.modules
if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    root_path = "/content/drive/My Drive/Capstone Project 2/"

print('Current Working Dir: ', os.getcwd())
print('Root Path: ', root_path)

# We need to set the working directory since we are using relative paths from various locations
if os.getcwd() != root_path:
  os.chdir(root_path)

Current Working Dir:  C:\git\Springboard-Public\Capstone Project 2\notebooks\Support Notebooks for Modules
Root Path:  C:/git/Springboard-Public/Capstone Project 2/


In [2]:
import numpy as np
from datetime import datetime
from collections import defaultdict
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

from modules.lib.ChextXRayImages import *
from modules.models.CustomPneumonia import CustomPneumoniaNN

from PIL import Image
import copy

import torch.optim as optim
import torch
import torch.utils.data
import torch.nn as nn
import torch.nn.functional as F

import torchvision
import torchvision.transforms as transforms
from torchvision.transforms import ToTensor, ToPILImage
import torchvision.models as models

from torchsummary import summary

os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

%matplotlib inline

In [3]:
force_cpu = True
device = torch.device('cuda' if ~force_cpu and torch.cuda.is_available() else 'cpu')
# Assume that we are on a CUDA machine, then this should print a CUDA device:
print(f'Working on device={device}')

Working on device=cuda


In [4]:
loaders = Loaders()
batch_size=16
val_percent=0.15
number_images = 1000
train_loader, val_loader = loaders.getDataTrainValidateLoaders(batch_size=batch_size, 
                                                                        val_percent=val_percent, 
                                                                        n_random_rows=number_images)

target_columns = loaders.target_columns

train_actual = loaders.train_df
val_actual = loaders.val_df

print(f'Number of Training Batches: {len(train_loader):,}')
print(f'Number of Validation Batches: {len(val_loader):,}')
print(f'Number of Training Images: {len(train_loader) * batch_size:,}')
print(f'Number of Validation Images: {len(val_loader) * batch_size:,}')

Feature Imbalance Detected (train % - val %):
   Lung_Opacity: 2.91%
   Consolidation: 2.72%
   Fracture: 2.24%

  self.warnFeatureImbalance(train, value)


Number of Training Batches: 53
Number of Validation Batches: 10
Number of Training Images: 848
Number of Validation Images: 160


In [5]:
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.pool = nn.MaxPool2d(2, 2)
        self.softmax = nn.Softmax(dim=1)       
        self.flattened_length_ = 1*320*320
        self.fc1 = nn.Linear(self.flattened_length_, 12)
       
    def forward(self, x):    
        x = x.view(-1, self.flattened_length_)    
        x = self.fc1(x)
        return x

In [6]:
net = SimpleModel()

net = nn.DataParallel(net)
net.to(device)

summary(net, (1, 320, 320))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                   [-1, 12]       1,228,812
       SimpleModel-2                   [-1, 12]               0
Total params: 1,228,812
Trainable params: 1,228,812
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.39
Forward/backward pass size (MB): 0.00
Params size (MB): 4.69
Estimated Total Size (MB): 5.08
----------------------------------------------------------------


In [7]:
data = next(iter(train_loader))
ImageID, inputs, labels = data['id'], data['img'], data['labels']

print('Batch ImageIDs: ', ImageID.detach().numpy())

print(labels.shape)

# move data to device GPU OR CPU
inputs = inputs.to(device)
labels = labels.to(device)

outputs = net(inputs)
print(inputs.shape)
print(outputs.shape)

print('-' * 50)

predicted = outputs.data
predicted = torch.sigmoid(predicted) 

predicted[predicted >= 0.5] = 1 # assign 1 label to those with less than 0.5
predicted[predicted < 0.5] = 0 # assign 0 label to those with less than 0.5
print(predicted, '\n')
print(labels)


print('-' * 50)

train_batch_size, train_label_count = labels.shape

print('Accurate Predictions: ', (predicted == labels).sum())
print('Total Predictions: ', train_batch_size * train_label_count)
train_acc = float((predicted == labels).sum()) / float((train_batch_size * train_label_count))
print(train_acc)

Batch ImageIDs:  [136247 158078  13477 124284  14932 198577 180148 207185   1110  93754
 216109  87460  42903   1234 216159  42488]
torch.Size([16, 12])
torch.Size([16, 1, 320, 320])
torch.Size([16, 12])
--------------------------------------------------
tensor([[1., 0., 1., 0., 1., 0., 0., 0., 1., 1., 0., 1.],
        [0., 0., 1., 0., 1., 0., 1., 1., 1., 0., 1., 0.],
        [1., 0., 1., 0., 1., 1., 1., 1., 1., 1., 0., 1.],
        [1., 0., 1., 0., 1., 1., 1., 0., 1., 1., 0., 1.],
        [0., 0., 1., 0., 1., 0., 0., 1., 1., 1., 0., 0.],
        [0., 0., 1., 0., 0., 1., 1., 1., 1., 0., 0., 0.],
        [0., 1., 1., 0., 0., 1., 1., 1., 0., 1., 0., 0.],
        [1., 1., 0., 0., 1., 1., 0., 0., 1., 1., 0., 1.],
        [0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
        [1., 0., 1., 0., 1., 0., 0., 0., 1., 1., 0., 1.],
        [1., 0., 1., 0., 1., 1., 0., 0., 1., 1., 0., 1.],
        [1., 0., 1., 0., 1., 0., 0., 1., 1., 1., 0., 0.],
        [1., 1., 1., 0., 0., 1., 0., 0., 1., 1., 

In [8]:
def parseLoaderData(data):
    """
    The data loaders output a dictionary with 3 keys
    The first 2 keys hold single values for the ImageID and the actual tensor of the image
    The last key holds a vector of the actual 12 lables
    """ 
    
    ids, inputs, labels = data['id'], data['img'], data['labels']
    # move data to device GPU OR CPU
    inputs, labels = inputs.to(device), labels.to(device)
    return ids, inputs, labels

In [9]:
def getPredictionsFromOutput(outputs):
    """
    We are using BCEWithLogitsLoss for out loss
    In this loss funciton, each label gets the sigmoid (inverse of Logit) before the CE loss
    So our model outputs the raw values on the last FC layer
    This means we have to apply sigmoid to our outputs to squash them between 0 and 1
    We then take values >= .5 as Positive and < .5 as Negative 
    """
    
    predictions = torch.sigmoid(outputs.data) 
    predictions[predictions >= 0.5] = 1 # assign 1 label to those with less than 0.5
    predictions[predictions < 0.5] = 0 # assign 0 label to those with less than 0.5   
    return predictions

In [10]:
def updatePredictions(dictionary, ids, predictions):
    """
    Keep track of predictions using the same index as our DataFrame
    This will allow us to compare to the actual labels
    
    We only are taking the last prediction for each x-ray, but we could extend this later if wanted.
    """
    
    for i in range(len(ids)):
        id = ids[i].item()    
        dictionary[id] = [int(f.item()) for f in predictions[i]]

In [11]:
def processBatch(net, data, optimizer=None):
    """
    Used for both training and validation.
    Validation will not pass in the optimizer.
    """

    # Convert output from loader
    ids, inputs, labels = parseLoaderData(data)
    
    if optimizer:
        # zero the parameter gradients
        optimizer.zero_grad()
        
    # Convert output to predicitons
    outputs = net(inputs)
    predictions = getPredictionsFromOutput(outputs)
    
    return ids, inputs, labels, outputs, predictions 

In [12]:
def backProp(criterion, outputs, labels, optimizer):
    """
    Get loss value from criterion
    run backprop on the loss
    update weights in optimizer
    update epoch loss
    """
    
    loss = criterion(outputs, labels)#.float())
    loss.backward()
    optimizer.step()
    return loss.item()

In [13]:
def getPredictionDataFrame(epoch_predictions):
    result = pd.DataFrame(epoch_predictions).transpose()
    result.columns = target_columns
    return result

In [14]:
epoch_loss = 0
losses_hx = {}

train_prediction_hx = {}
val_prediction_hx = {}

epoch_train_predictions = {}
epoch_val_predictions = {}

df_train_prediction = None
df_val_prediction = None

In [15]:
def closeTrainEpoch(i):
    global training_time_elapsed
    global epoch_loss
    global losses_hx
    global train_prediction_hx
    global last_train_predictions
    global df_train_prediction
    
    training_time_elapsed = datetime.now() - start_time
    epoch_loss = epoch_loss / len(train_loader)
    losses_hx[i] = epoch_loss    
    
    df_train_prediction = getPredictionDataFrame(epoch_train_predictions)
    train_prediction_hx[i] = df_train_prediction
    last_train_predictions = {}

In [16]:
def closeValEpoch(i):
    global validation_time_elapsed
    global val_prediction_hx
    global last_val_predictions
    global df_val_prediction
    
    validation_time_elapsed = datetime.now() - start_time
    
    df_val_prediction = getPredictionDataFrame(epoch_val_predictions)
    val_prediction_hx[i] = df_val_prediction
    last_val_predictions = {}

In [17]:
learning_rate = 1e-4
num_epochs = 2

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate)#, weight_decay=0.9)

In [18]:
for epoch in range(num_epochs):  # loop over the dataset multiple times
    start_time = datetime.now()
    
    
    # Training
    net.train()
    for i, data in enumerate(train_loader, 0):
        ids, inputs, labels, outputs, predictions = processBatch(net, data, optimizer)
        updatePredictions(epoch_train_predictions, ids, predictions)
        epoch_loss += backProp(criterion, outputs, labels, optimizer)

    closeTrainEpoch(i)
    
    
    # Validation
    net.eval()
    with torch.no_grad():
      for data in val_loader:          
            ids, inputs, labels, _, predictions = processBatch(net, data)
            updatePredictions(epoch_val_predictions, ids, predictions)
   
    closeValEpoch(i)
    
    
    # stdout Results
    print(f'Epoch [{epoch+1}/{num_epochs}], \
\n          Epoch Loss: {epoch_loss:.4f} \
\n          Training Time: {training_time_elapsed})  \
\n          Validation Time: {validation_time_elapsed})')

Epoch [1/2], 
          Epoch Loss: 1.0135 
          Training Time: 0:00:02.997977)  
          Validation Time: 0:00:03.539528)
Epoch [2/2], 
          Epoch Loss: 0.8335 
          Training Time: 0:00:03.046847)  
          Validation Time: 0:00:03.578425)


In [19]:
def displayImageResults(actual, predicted, imageID):
    actual = actual[target_columns].transpose()
    predicted = predicted.transpose()
    result = pd.DataFrame()
    result['Actual'] = actual[imageID]
    result[result['Actual']==-1] = 0
    result['Predicted'] = predicted[imageID]
    result['Successful'] = result['Actual'] == result['Predicted']
    display(result)

In [20]:
displayImageResults(train_actual, 
                    df_train_prediction, 
                    45510)

Unnamed: 0,Actual,Predicted,Successful
Enlarged_Cardiomediastinum,0,0,True
Cardiomegaly,0,0,True
Lung_Opacity,0,1,False
Lung_Lesion,0,0,True
Edema,0,0,True
Consolidation,0,0,True
Pneumonia,0,0,True
Atelectasis,0,0,True
Pneumothorax,0,0,True
Pleural_Effusion,1,1,True


In [21]:
df_train_result = train_actual.join(df_train_prediction, lsuffix='_actual', rsuffix='_predicted')
df_val_result = val_actual.join(df_val_prediction, lsuffix='_actual', rsuffix='_predicted')

In [22]:
df_train_result 
df_val_result

Unnamed: 0_level_0,PatientID,StudyID,Age,Sex_Male,Sex_Unknown,Orientation_PA,Support Devices,Image_Path,Hierarchical_Path,Enlarged_Cardiomediastinum_actual,...,Lung_Opacity_predicted,Lung_Lesion_predicted,Edema_predicted,Consolidation_predicted,Pneumonia_predicted,Atelectasis_predicted,Pneumothorax_predicted,Pleural_Effusion_predicted,Pleural_Other_predicted,Fracture_predicted
ImageID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
198595,48952,1,18,1,0,0,1.0,data/raw/train/patient48952/study1/view1_front...,data/d45/d2/i198595.jpg,0,...,0,0,0,0,0,0,0,0,0,0
124396,29782,4,57,0,0,0,1.0,data/raw/train/patient29782/study4/view1_front...,data/d46/d32/i124396.jpg,0,...,0,0,0,1,1,0,0,0,0,0
196124,47919,2,81,1,0,0,0.0,data/raw/train/patient47919/study2/view1_front...,data/d24/d19/i196124.jpg,-1,...,0,0,0,1,1,0,0,0,0,0
76225,18302,3,55,0,0,0,1.0,data/raw/train/patient18302/study3/view1_front...,data/d25/d2/i76225.jpg,0,...,0,0,0,1,1,0,0,0,0,0
28706,6999,4,73,1,0,0,1.0,data/raw/train/patient06999/study4/view1_front...,data/d6/d49/i28706.jpg,0,...,0,0,0,1,0,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
222821,64005,1,73,0,0,0,1.0,data/raw/train/patient64005/study1/view1_front...,data/d21/d5/i222821.jpg,0,...,0,0,0,1,1,0,0,0,0,0
190068,45536,1,90,1,0,0,1.0,data/raw/train/patient45536/study1/view1_front...,data/d18/d36/i190068.jpg,-1,...,0,0,0,1,1,0,0,0,0,0
37141,9097,4,62,1,0,1,1.0,data/raw/train/patient09097/study4/view1_front...,data/d41/d47/i37141.jpg,0,...,0,1,0,1,1,0,0,0,0,0
146833,35021,2,22,0,0,0,1.0,data/raw/train/patient35021/study2/view1_front...,data/d33/d21/i146833.jpg,0,...,0,0,0,0,0,0,0,1,0,0
