In [None]:
import sys
import os, os.path

sys.path.append(os.path.join(os.getcwd() ,'/modules'))
root_path = "C:/git/Springboard-Public/Capstone Project 2/"
IN_COLAB = 'google.colab' in sys.modules
if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    root_path = "/content/drive/My Drive/Capstone Project 2/"

print('Current Working Dir: ', os.getcwd())
print('Root Path: ', root_path)

# We need to set the working directory since we are using relative paths from various locations
if os.getcwd() != root_path:
  os.chdir(root_path)

In [None]:
import numpy as np
from datetime import datetime
from collections import defaultdict
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

from modules.lib.ChextXRayImages import *
from modules.models.CustomPneumonia import CustomPneumoniaNN

from PIL import Image
import copy

import torch.optim as optim
import torch
import torch.utils.data
import torch.nn as nn
import torch.nn.functional as F

import torchvision
import torchvision.transforms as transforms
from torchvision.transforms import ToTensor, ToPILImage
import torchvision.models as models

from torchsummary import summary

%matplotlib inline

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Assume that we are on a CUDA machine, then this should print a CUDA device:
print(f'Working on device={device}')

In [None]:
df = CleanMetaData().getCleanDF(n_random_rows=100)
ds_items = []
ds = Dataset(df)
for i, data in enumerate(ds):
    ds_items.append(i)

print(len(ds_items))

In [None]:
l = Loaders().getDataLoader(batch_size=1, n_random_rows=100)
dl_items = []
for i, data in enumerate(l):
    dl_items.append(i)

print(len(dl_items))

In [None]:
loaders = Loaders()
batch_size=32
train_percent=0.85
number_images = 1000
train_loader, val_loader = loaders.getDataTrainValidateLoaders(batch_size=batch_size, 
                                                                        train_percent=train_percent, 
                                                                        n_random_rows=number_images)
print(f'Number of Training Batches: {len(train_loader):,}')
print(f'Number of Validation Batches: {len(val_loader):,}')
print(f'Number of Training Images: {len(train_loader) * batch_size:,}')
print(f'Number of Validation Images: {len(val_loader) * batch_size:,}')

In [None]:
net = CustomPneumoniaNN()

net = nn.DataParallel(net)
net.to(device)

summary(net, (1, 320, 320))

In [None]:
learning_rate = 1e-4
num_epochs = 30
torch.set_num_threads(1)

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(net.parameters(), lr=learning_rate)#, weight_decay=0.9)

In [None]:
train_accuracy_index = []
train_acc, train_total, train_correct = 0, 0, 0
val_accuracy_index = []
val_acc, val_total, val_correct = 0, 0, 0
test_accuracy_index = []
test_acc, test_total, test_correct = 0, 0, 0
losses_index = []
for epoch in range(num_epochs):  # loop over the dataset multiple times
    start_time = datetime.now()
    net.train()
    running_loss = 0.0
    epoch_loss = 0
    for i, data in enumerate(train_loader, 0):
        # get the inputs
        inputs, labels = data['img'], data['labels']['Pneumonia']
        # move data to device GPU OR CPU
        inputs, labels = inputs.to(device), labels.to(device)
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = net(inputs)
        
        #Get Training Accuracty
        _, predicted = torch.max(outputs.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()
        train_acc = train_correct/train_total
        train_accuracy_index.append(train_acc)

        #loss, back prop and update params
        loss = criterion(outputs, labels)#.float())
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    epoch_loss = epoch_loss / len(train_loader)
    time_elapsed = datetime.now() - start_time
    losses_index.append(epoch_loss)
    
    # Validation set
    net.eval()
    with torch.no_grad():
      for data in val_loader:
          inputs, labels = data
          inputs, labels = inputs.to(device), labels.to(device)
          outputs = net(inputs)
          _, predicted = torch.max(outputs.data, 1)
          val_total += labels.size(0)
          val_correct += (predicted == labels).sum().item()
          val_acc = val_correct/val_total
      val_accuracy_index.append(val_acc)

      for data in val_loader:
          inputs, labels = data
          inputs, labels = inputs.to(device), labels.to(device)
          outputs = net(inputs)
          _, predicted = torch.max(outputs.data, 1)
          test_total += labels.size(0)
          test_correct += (predicted == labels).sum().item()
          test_acc = test_correct/test_total
      test_accuracy_index.append(test_acc)
    
    
    print(f'Epoch [{epoch+1}/{num_epochs}], \
          Epoch Loss: {epoch_loss:.4f} \
          Training Accuracy: {train_acc:.4f}  \
          Validation Accuracy: {val_acc:.4f} \
          Test Accuracy: {test_acc:.4f} - (time={time_elapsed})')