In [2]:
import os
import json
import random
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.nn.modules.loss import _WeightedLoss
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingLR
from tqdm import tqdm
import sys
sys.path.append('./')
from utils import distribute_over_GPUs, validate_arguments
from model import Model, Identity
from get_dataloader import get_dataloader
from sklearn.metrics import f1_score
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, roc_curve,average_precision_score, precision_recall_curve,auc
import h5py
from losses import edl_mse_loss, edl_digamma_loss, edl_log_loss, relu_evidence
from helpers import get_device, rotate_img, one_hot_embedding

In [21]:
class Record:
    def __init__(self):
        self.dataset = "nct100k"
        self.save_dir = "new_result"
        self.data_input_dir = "/n/holyscratch01/wadduwage_lab/Nirho"
        self.data_input_dir_test = "/n/holyscratch01/wadduwage_lab/Nirho"
        self.log_path = "new_result"
        self.model_path = "distilled_model.pth"
        self.num_classes = 9
        self.training_data_csv = "nct100k.csv"
        self.test_data_csv = "crc7k.csv"
        self.validation_data_csv = "crc7k.csv"
        self.trainingset_split = None
        self.balanced_training_set = False
        self.balanced_validation_set = False
        self.train_supervised = True
        self.batch_size = 128
        self.batch_size_multiGPU = 512
        self.num_workers = 40
        self.pretrained = False
        self.finetune = False
        self.grayscale = False
        self.seed = 44
        self.uncertainty = True
        self.epochs = 1
        self.image_size = 224
        self.use_album = False
        self.weight_decay = 1e-6
        self.lr = 1e-3
        self.device = " "
        self.scale = [0.2, 1.0]
        self.rgb_gaussian_blur_p = 0
        self.rgb_jitter_d = 1
        self.rgb_jitter_p = 0.8
        self.rgb_contrast = 0.2
        self.rgb_contrast_p = 0
        self.rgb_grid_distort_p = 0
        self.rgb_grid_shuffle_p = 0
        

In [30]:
opt=Record()
output_dims = 1024
if not os.path.exists(opt.save_dir):
    os.makedirs(opt.save_dir, exist_ok=True)
with open(f'{opt.log_path}/metadata_train.txt', 'w') as metadata_file:
    metadata_file.write(json.dumps(vars(opt)))
opt.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Device:', opt.device)
random.seed(opt.seed)
np.random.seed(opt.seed)
torch.manual_seed(opt.seed)
torch.cuda.manual_seed(opt.seed)
torch.cuda.manual_seed_all(opt.seed)


Device: cuda:0


In [31]:
torch.backends.cudnn.benchmark=True
class Net(nn.Module):
    def __init__(self, opt):
        super(Net, self).__init__()

        # Load pre-trained model
        base_model = Model(pretrained=opt.pretrained)
        self.f = base_model.f
        self.g1 = base_model.g1
        # classifier
        self.fc = nn.Linear(output_dims, opt.num_classes, bias=True)

    def forward(self, x):
        x = self.f(x)
        feature = torch.flatten(x, start_dim=1)
        feature = self.g1(feature)
        out = self.fc(feature)
        return out,feature

In [35]:
def train_val(net, data_loader, train_optimizer,device,num_classes,uncertainty=False):
    is_train = train_optimizer is not None
    net.eval() # train only the last layers.
    #net.train() if is_train else net.eval()

    total_loss, total_correct, total_num, data_bar = 0.0, 0.0, 0, tqdm(data_loader)
    all_preds, all_labels, all_slides, all_patches  = [], [], [], []
    all_evidence=[]
    all_evidence=torch.FloatTensor(all_evidence)
    all_evidence = all_evidence.cuda(non_blocking=True)

    with (torch.enable_grad() if is_train else torch.no_grad()):
        for data, _, target, patch_id, slide_id in data_bar:
            data, target = data.cuda(non_blocking=True), target.cuda(non_blocking=True)
            if uncertainty:
                out,feature = model(data)
                _, preds = torch.max(out.data, 1)
                evidence = relu_evidence(out)
                all_evidence=torch.cat((all_evidence,evidence),dim=0)
                total_evidence = torch.sum(evidence, 1, keepdim=True)
            else:
                out,feature = net(data)
                _, preds = torch.max(out.data, 1)
            if is_train:
                train_optimizer.zero_grad()
                loss.backward()
                train_optimizer.step()
            _, preds = torch.max(out.data, 1)


            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(target.cpu().data.numpy())
            all_patches.extend(patch_id)
            all_slides.extend(slide_id)

            probs = torch.nn.functional.softmax(out.data, dim=1).cpu().numpy()
         
            total_num += data.size(0)
            prediction = torch.argsort(out, dim=-1, descending=True)
            total_correct += torch.sum((prediction[:, 0:1] == target.unsqueeze(dim=-1)).any(dim=-1).float()).item()
            
            data_bar.set_description(f'{"Train" if is_train else "Test"} Epoch: [1] ACC: {total_correct / total_num * 100:.2f}')

    if uncertainty:     
        alpha = all_evidence + 1
        u = num_classes / torch.sum(alpha, dim=1, keepdim=True)
        prob = alpha / torch.sum(alpha, dim=1, keepdim=True)
        u=u.cpu().data.numpy()
        u2=u[:, 0]  
        df =  pd.DataFrame({
                'label': all_labels,
                'prediction': all_preds,
                'slide_id': all_slides,
                'patch_id': all_patches,
                'uncertainty' : u2
            })
    else:
        df =  pd.DataFrame({
                'label': all_labels,
                'prediction': all_preds,
                'slide_id': all_slides,
                'patch_id': all_patches
        })
    return total_correct / total_num * 100, df

In [36]:
model = Net(opt)
model, num_GPU = distribute_over_GPUs(opt, model)
train_loader, train_data, val_loader, val_data, test_loader, test_data = get_dataloader(opt)

if not opt.finetune:
    for param in model.module.f.parameters():
        param.requires_grad = False

optimizer = optim.Adam(model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay)

scheduler = CosineAnnealingLR(optimizer, opt.epochs)

criterion = edl_mse_loss
loss_criterion = nn.CrossEntropyLoss().to(opt.device)
results = {'train_acc': [],'val_acc': []}
model.load_state_dict(torch.load(f'{opt.model_path}'))
model.eval()
test_acc, df = train_val(model, test_loader, None,opt.device,opt.num_classes,uncertainty=opt.uncertainty)
df.to_csv(f"{opt.log_path}/inference_result.csv")



Let's use 4 GPUs!
opt.data_input_dir:  /n/holyscratch01/wadduwage_lab/Nirho
opt.data_input_dir_test:  /n/holyscratch01/wadduwage_lab/Nirho
reading csv file:  nct100k.csv
reading csv file:  crc7k.csv
reading csv file:  crc7k.csv
Removing non-existing file from dataset: /n/holyscratch01/wadduwage_lab/Nirho/NCT-CRC-HE-100K/ADI/.DS_Store
training patches:  label
ADI     10407
BACK    10566
DEB     11512
LYM     11557
MUC      8896
MUS     13536
NORM     8763
STR     10446
TUM     14317
dtype: int64
Validation patches:  label
ADI     1338
BACK     847
DEB      339
LYM      634
MUC     1035
MUS      592
NORM     741
STR      421
TUM     1233
dtype: int64
Test patches:  label
ADI     1338
BACK     847
DEB      339
LYM      634
MUC     1035
MUS      592
NORM     741
STR      421
TUM     1233
dtype: int64
Saving training/val set to file
{'ADI': 0, 'MUC': 1, 'BACK': 2, 'LYM': 3, 'NORM': 4, 'DEB': 5, 'MUS': 6, 'TUM': 7, 'STR': 8}
{'ADI': 0, 'MUC': 1, 'BACK': 2, 'LYM': 3, 'NORM': 4, 'DEB': 5, 'MUS

Test Epoch: [1] ACC: 96.44: 100%|██████████| 28/28 [00:51<00:00,  1.85s/it]
