In [1]:
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from collections import OrderedDict
from os import makedirs
# from models import Models

In [2]:
def f(x):
    return x

In [3]:
i = interact(f, x=True)

interactive(children=(Checkbox(value=True, description='x'), Output()), _dom_classes=('widget-interact',))

In [23]:
model_config = {
    "model_type": "MIL",
    "label_type": "Whole slide labels",
    "backbone": "ResNet18",
    "output_class": 1,
    "learning_rate": 0.0001,
    "epochs": 100,
    "pretrained": True,
    "batch_size": 64,
    "freeze_weights": False
}

hard_config = {
    "num_workers": 0,
    "tensor_type": torch.cuda.FloatTensor,
    "optimizer": "Adam",
    "loss_function": "bce_loss",
    "pos_weights": 1.45,
}

In [18]:
print(model_config['model_type'])

MIL


In [20]:
def select_model(model_type='MIL', backbone='ResNet18', output_class=1, learning_rate=0.0001, epochs=100, pretrained=True, batch_size=64):
    model_config['model_type'] = model_type
    if model_type == 'MIL':
        model_config['label_type'] = 'Whole slide labels'
        model_config['freeze_weights'] = True
    if model_type == 'Supervised':
        model_config['label_type'] = 'Annotations'
        model_config['freeze_weights'] = False
    model_config['backbone'] = backbone
    model_config['output_class'] = output_class
    model_config['learning_rate'] = learning_rate
    model_config['epochs'] = epochs
    model_config['pretrained'] = pretrained
    model_config['batch_size'] = 64
    return model_config
config_widget = interact(select_model, model_type=['MIL', 'Supervised'], backbone=['None', 'ResNet18', 'ResNet34'], output_class=widgets.IntText(value=1), learning_rate=widgets.FloatText(value=0.0001), epochs=widgets.IntText(value=100), pretrained=True, batch_size=widgets.IntText(value=64))
model_config = config_widget.widget.result

interactive(children=(Dropdown(description='model_type', options=('MIL', 'Supervised'), value='MIL'), Dropdown…

In [21]:
def create_project():
    makedirs('data/model training/whole slide labels', exist_ok=True)
    makedirs('data/model training/annotations', exist_ok=True)
    makedirs('data/acquisitions', exist_ok=True)
    makedirs('data/annotations/QuPath', exist_ok=True)
    makedirs('data/annotations/ImageScope', exist_ok=True)

In [22]:
model_config

{'model_type': 'MIL',
 'label_type': 'Whole slide labels',
 'backbone': 'ResNet18',
 'output_class': 1,
 'learning_rate': 0.0001,
 'epochs': 100,
 'pretrained': True,
 'batch_size': 64}

In [9]:
create_project()

In [33]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as pytorch_models
from collections import OrderedDict
# from sklearn.metrics import accuracy_score, roc_curve, roc_auc_score

class IClassifier(nn.Module):
    def __init__(self, feature_extractor, feature_size, output_class):
        super(IClassifier, self).__init__()
        
        self.feature_extractor = feature_extractor      
        self.fc = nn.Linear(feature_size, output_class)
        
        
    def forward(self, x):
        device = x.device
        feats = self.feature_extractor(x) # N x K
        c = self.fc(feats.view(feats.shape[0], -1)) # N x C
        return feats.view(feats.shape[0], -1), c

class BClassifier(nn.Module):
    def __init__(self, input_size, output_class, dropout_v=0.0): # K, L, N
        super(BClassifier, self).__init__()
        self.q = nn.Linear(input_size, 128)
        self.v = nn.Sequential(nn.Dropout(dropout_v),nn.Linear(input_size, input_size),)
        self.fcc = nn.Conv1d(output_class, output_class, kernel_size=input_size)  
        
    def forward(self, feats, c, top_feat=None): # N x K, N x C
        device = feats.device
        V = self.v(feats) # N x V, unsorted
        Q = self.q(feats).view(feats.shape[0], -1)
        for i in range(c.shape[1]):
            _, indices = torch.sort(c[:, i], 0, True)         
            feats = torch.index_select(feats, 0, indices) # N x K, sorted
            if top_feat is None:
                q_max = self.q(feats[0].view(1, -1)) # 1 x 1 x Q
            else:
                q_max = top_feat[i]
            temp = torch.mm(Q, q_max.view(-1, 1)) / torch.sqrt(torch.tensor(Q.shape[1], dtype=torch.float32, device=device))
            if i == 0:
                A = F.softmax(temp, 0) # N x 1
                B = torch.sum(torch.mul(A, V), 0).view(1, -1) # 1 x V
            else:
                temp = F.softmax(temp, 0) # N x 1
                A = torch.cat((A, temp), 1) # N x C
                B = torch.cat((B, torch.sum(torch.mul(temp, V), 0).view(1, -1)), 0) # C x V -> 1 x C x V
        B = B.view(1, B.shape[0], B.shape[1]) # 1 x C x V
        C = self.fcc(B) # 1 x C x 1
        C = C.view(1, -1)
        return C, A, B 
    
class MILNet(nn.Module):
    def __init__(self, i_classifier, b_classifier):
        super(MILNet, self).__init__()
        self.i_classifier = i_classifier
        self.b_classifier = b_classifier
        
    def forward(self, x, top_feat=None):
        feats, classes = self.i_classifier(x)
        prediction_bag, A, B = self.b_classifier(feats, classes, top_feat)
        
        return classes, prediction_bag, A, B
        

class Models(): 
    def __init__(self, model_config, hard_config, dataloader):
        self.model_config = model_config
        self.hard_config = hard_config
        self.dataloader = dataloader
        
    def create_model(self):
        model_config = self.model_config
        if model_config['backbone'] == 'ResNet18':
            feature_extractor = pytorch_models.resnet18(pretrained=False, norm_layer=nn.InstanceNorm2d)
            feature_size = 512
        if model_config['backbone'] == 'ResNet34':
            feature_extractor = pytorch_models.resnet34(pretrained=False, norm_layer=nn.InstanceNorm2d)
            feature_size = 512
        if model_config['backbone'] == None:
            feature_extractor = nn.Identity()
            feature_size = 512
        feature_extractor.fc = nn.Identity()
        if model_config['freeze_weights'] and model_config["pretrained"]:
            for param in feature_extractor.parameters():
                param.requires_grad = False
        if model_config['model_type'] == 'MIL':
            i_classifier = IClassifier(feature_extractor, feature_size, model_config['output_class'])
            b_classifier = BClassifier(feature_size, model_config['output_class'])
            model = MILNet(i_classifier, b_classifier)
        if model_config['model_type'] == 'Supervised':
            model = IClassifier(feature_extractor, feature_size, model_config['output_class']) 
        return model
    
    def load_weights(self, model, weights_path='encoder.pth'):
        model_config = self.model_config
        state_dirct_weights = torch.load(weights_path)
        new_state_dict = OrderedDict()
        state_dirct_weights.pop('module.l1.weight')
        state_dirct_weights.pop('module.l1.bias')
        state_dirct_weights.pop('module.l2.weight')
        state_dirct_weights.pop('module.l2.bias')
        if model_config['model_type'] == 'MIL':
            state_dict_init = model.i_classifier.state_dict()
            for (k, v), (k_0, v_0) in zip(state_dirct_weights.items(), state_dict_init.items()):
                new_state_dict[k_0] = v
            model.i_classifier.load_state_dict(new_state_dict, strict=False)
        if model_config['model_type'] == 'Supervised':
            state_dict_init = model.state_dict()
            for (k, v), (k_0, v_0) in zip(state_dirct_weights.items(), state_dict_init.items()):
                new_state_dict[k_0] = v
            model.load_state_dict(new_state_dict, strict=False)
        return model
    
    def train_epoch(self, model, optimizer, criterion):
        hard_config = self.hard_config
        model_config = self.model_config
        dataloader = self.dataloader
        Tensor = hard_config['tensor_type']
  
        if model_config['model_type'] == 'Supervised':
            loss_total = 0
            bc = 0
            model.train()
            for iteration, batch in enumerate(dataloader):
                optimizer.zero_grad()      
                patches = Tensor(batch['input'])
                labels = Tensor(batch['label'])
                feats, classes = model(patches)
                loss_bag = criterion(classes, labels.reshape(classes.shape))
                loss_bag.backward()
                optimizer.step()
                loss_total = loss_bag.item() + loss_total 
                bc = bc + 1
                if iteration % 100 == 0:
                    sys.stdout.write('\r[%d/%d] loss: %.4f' % (iteration, len(dataloader), loss_bag.item()))
        
        if model_config['model_type'] == 'MIL' and model_config['backbone'] is not None:
            loss_total = 0
            bc = 0
            model.train()
            for bag in dataloder:
                bc = bc + 1
                for iteration, batch in enumerate(bag):
                    optimizer.zero_grad()      
                    patches = Tensor(batch['input'])
                    labels = Tensor(batch['label'])
                    label = labels[0]
                    scores, bag_prediction, attentions, representation = model(patches)
                    max_prediction, _ = torch.max(scores, 0)
                loss_top = criterion(max_prediction.view(1, -1), label.view(1, -1))
                loss_bag = criterion(bag_prediction.view(1, -1), label.view(1, -1))
                loss = 0.5*loss_top + 0.5*loss_bag
                loss_bag.backward()
                optimizer.step()
                loss_total = loss_bag.item() + loss_total 
                sys.stdout.write('\r[%d/%d] loss: %.4f' % (iteration, len(dataloader), loss_total))
                
        if model_config['model_type'] == 'MIL' and model_config['backbone'] is None:
            loss_total = 0
            bc = 0
            model.train()
            for bag in dataloader:
                optimizer.zero_grad()
                df = pd.read_csv(bag)
                feats = df.iloc[:, :-1]
                feats = feats.to_numpy()
                label = df.iloc[0, 0]
                bag_feats = Tensor([feats])
                bag_label = Tensor([label])
                scores, bag_prediction, attentions, representation = model(bag_feats)
                max_prediction, _ = torch.max(scores)
                bag_loss = criterion(bag_prediction.view(1, -1), bag_label.view(1, -1))
                max_loss = criterion(max_prediction.view(1, -1), bag_label.view(1, -1))
                loss = 0.5*bag_loss + 0.5*max_loss
                loss.backward()
                optimizer.step()
                total_loss = total_loss + loss.item()
                sys.stdout.write('\r[%d/%d] bag loss: %.4f, %.4f' % (c, len(csvs), max_loss.item(), bag_loss.item()))           
        return model, loss_total / bc
    
    def test_epoch(self, model, criterion):
        hard_config = self.hard_config
        model_config = self.model_config
        dataloader = self.dataloader
        Tensor = hard_config['tensor_type']
        criterion = nn.BCEWithLogitsLoss(pos_weight=Tensor(hard_config['pos_weights']))     
        if model_config['model_type'] == 'Supervised':
            loss_total = 0
            bc = 0
            model.eval()
            labels = []
            predictions = []
            for iteration, batch in enumerate(dataloader):    
                patches = Tensor(batch['input'])
                labels = Tensor(batch['label'])
                feats, classes = model(patches)
                loss_bag = criterion(classes, labels.reshape(classes.shape))
                loss_total = loss_bag.item() + loss_total 
                bc = bc + 1
                if iteration % 100 == 0:
                    sys.stdout.write('\r[%d/%d] loss: %.4f' % (iteration, len(dataloader), loss_bag.item()))
                labels.extand(labels.cpu().numpy())
                predictions.extend(classes.cpu.numpy())
        return labels, predictions, loss_total / bc
    
    def train(self, model, optimizer):
        def multi_label_roc(labels, predictions, num_classes, pos_label=1):
            fprs = []
            tprs = []
            thresholds = []
            thresholds_optimal = []
            aucs = []
            for c in range(0, num_classes):
                label = labels[:, c]
                prediction = predictions[:, c]
                fpr, tpr, threshold = roc_curve(label, prediction, pos_label=1)
                fpr_optimal, tpr_optimal, threshold_optimal = optimal_thresh(fpr, tpr, threshold)
                c_auc = roc_auc_score(label, prediction)
                aucs.append(c_auc)
                thresholds.append(threshold)
                thresholds_optimal.append(threshold_optimal)
            return aucs, thresholds, thresholds_optimal
        def optimal_thresh(fpr, tpr, thresholds, p=0):
            loss = (fpr - tpr) - p * tpr / (fpr + tpr + 1)
            idx = np.argmin(loss, axis=0)
            return fpr[idx], tpr[idx], thresholds[idx]        
        model_config = self.model_config
        hard_config = self.hard_config
        optimizer = torch.optim.Adam(model.parameters(), lr=model_config['learning_rate'], betas=(0.5, 0.9), weight_decay=5e-3)
        criterion = nn.BCEWithLogitsLoss(pos_weight=Tensor(hard_config['pos_weights']))
        for i in range(modedl_config['epochs']):
            model, train_loss = self.train_epoch(model, optimizer)
            labels, predictions, test_loss = self.test_epoch(model, optimizer)
            test_labels = np.array(labels)
            test_predictions = np.array(predictions)
            auc_value, _, thresholds_optimal = multi_label_roc(test_labels, test_predictions, args.num_classes, pos_label=1)
            for i in range(model_config['output_class']):
                class_prediction_bag = test_predictions[:, i]
                class_prediction_bag[class_prediction_bag>=thresholds_optimal[i]] = 1
                class_prediction_bag[class_prediction_bag<thresholds_optimal[i]] = 0
                test_predictions[:, i] = class_prediction_bag
            accuracy = accuracy_score(test_labels, test_predictions) 
            auc = np.mean(np.array(auc_value))
        return model, thresholds_optimal, accuracy, auc, train_loss, test_loss

In [34]:
models = Models(model_config, hard_config, None)
models.train_model()

called


0

In [None]:
class Dataset():
    def __init__(self, csv_file, transform=None):
        self.files_list = pd.read_csv(csv_file)
        self.transform = transform
    def __len__(self):
        return len(self.files_list)
    def __getitem__(self, idx):
        img = self.files_list.iloc[idx, 0]
        label = self.files_list.iloc[idx, 1]
        img = Image.open(img)
        sample = {'input': img, 'label': label}
        
        if self.transform:
            sample = self.transform(sample)
        return sample
    
class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, img):
        for t in self.transforms:
            img = t(img)
        return img

    def __repr__(self):
        format_string = self.__class__.__name__ + '('
        for t in self.transforms:
            format_string += '\n'
            format_string += '    {0}'.format(t)
        format_string += '\n)'
        return format_string
    
class ToTensor(object):
    def __call__(self, sample):
        img = sample['input']
        label = sample['label']
        img = VF.to_tensor(img)
        return {'input': img, 'label': label}
    
class Crop(object):
    def __init__(self, output_size=(args.patch_size, args.patch_size)):
        self.size = output_size
                 
    def __call__(self, sample):
        img = sample['input']
        label = sample['label']
        crop = transforms.CenterCrop(self.size)
        img = crop(img)
        return {'input': img, 'label': label}

class ResizedCrop(object):
    def __init__(self, output_size=(args.patch_size, args.patch_size ), scale=(0.8, 1.25), ratio=(0.8, 1.25), resample=Image.BILINEAR):
        self.size = output_size
        self.scale = scale
        self.ratio = ratio
        self.resample = resample

    def __call__(self, sample):
        img = sample['input']
        label = sample['label']
        if random.random()>0.5:
            resized_crop = transforms.RandomResizedCrop(self.size, self.scale, self.ratio, self.resample)
            img = resized_crop(img)
        else:
            resize = transforms.Resize(self.size)
            img = resize(img)
        return {'input': img, 'label': label}
    
class Affine(object):
    def __init__(self, degrees=(-180, 180), translate=None, scale=(0.8, 1.25), shear=(-15, 15, -15, 15), resample=Image.BILINEAR):
        self.degrees = degrees
        self.translate = translate
        self.scale = scale
        self.shear = shear
        self.resample = resample

    def __call__(self, sample):
        img = sample['input']
        label = sample['label']
        if random.random()>0.5:
            affine = transforms.RandomAffine(self.degrees, self.translate, self.scale, self.shear, self.resample)
            img = affine(img)
        else:
            img = img
        return {'input': img, 'label': label}
    
class Perspective(object):
    def __init__(self, distortion=0.1, p=0.2, resample=Image.BILINEAR):
        self.distortion = distortion
        self.p = p
        self.resample = resample

    def __call__(self, sample):
        img = sample['input']
        label = sample['label']
        perspective = transforms.RandomPerspective(self.distortion, self.p, self.resample)
        img = perspective(img)
        return {'input': img, 'label': label}
    
class Flip(object):
    def __init__(self, p=0.5):
        self.p = p

    def __call__(self, sample):
        img = sample['input']
        label = sample['label']
        flip_h = transforms.RandomHorizontalFlip(self.p)
        flip_v = transforms.RandomVerticalFlip(self.p)
        img = flip_h(img)
        img = flip_v(img)
        return {'input': img, 'label': label}
    
class Jitter(object):

    def __init__(self, value=0.2):
        self.value = value

    def __call__(self, sample):     
        img = sample['input']
        label = sample['label']
        if random.random()>0.8:
            jitter = transforms.ColorJitter(self.value, self.value, self.value)
            img = jitter(img)
        return {'input': img, 'label': label}
    
class DataSets():
    def __init__(self, model_config, hard_config):
        self.model_config = model_config
        self.hard_config = hard_config
        
    def generate_csv(self):
        model_config = self.model_config
        if model_config['model_type'] == 'MIL':
            neg_bags = glob.glob('data/model training/whole slide labels/negative/')
            for neg_bag in neg_bags:
                patches = glob.glob(os.path.join(neg_bag, '*.jpg'))
                labels = pd.DataFrame(np.zeros(len(patches)))
                patches_df = pd.DataFrame(patches).assign(label=labels.values)
                patches_df.to_csv(os.path.join(neg_bag, 'patches.csv'), index=False)
            pos_bags = glob.glob('data/model training/whole slide labels/positive/')
            for pos_bag in pos_bags:
                patches = glob.glob(os.path.join(pos_bag, '*.jpg'))
                labels = pd.DataFrame(np.zeros(len(patches)))
                patches_df = pd.DataFrame(patches).assign(label=labels.values)
                patches_df.to_csv(os.path.join(pos_bag, 'patches.csv'), index=False)
        if model_config['model_type'] == 'Supervised':
            neg_files = 'data/model training/annotations/negative/*.jpg'
            pos_files = 'data/model training/annotations/postive/*.jpg'
            label_neg = pd.DataFrame(np.zeros(len(neg_files)))
            label_pos = pd.DataFrame(nn.ones(len(pos_files)))
            neg_df = pd.DataFrame(neg_files).assign(label=label_neg.values)
            pos_df = pd.DataFrame(pos_files).assign(label=label_pos.values)
            neg_df.to_csv('data/model training/annotations/negative_files.csv', index=False)
            pos_df.to_csv('data/model training/annotations/positive_files.csv', index=False)
            
    def get_data_loader(self):
        model_config = self.model_config
        if model_config['model_type'] == 'MIL':
            csvs = glob.glob('data/model training/whole slide labels/*/*/*.csv')
            train_dataloaders = []
            test_dataloaders = []
            for csv in csvs:
                train_dataloader, _ = train_dataloader
                train_dataloaders.extend(train_dataloader)
   
    def train_dataloader(self, csv_file_path):
        transformed_dataset = Dataset(csv_file=csv_file_path,
                                    transform=Compose([
                                        ResizedCrop(),
                                        Jitter(),
                                        Flip(),
                                        Affine(),
                                        Perspective(),
                                        ToTensor()
                                    ]))
        dataloader = DataLoader(transformed_dataset, batch_size=self.model_config['batch_size'], shuffle=True, num_workers=self.hard_config['num_workers'], drop_last=False)
        return dataloader, len(transformed_dataset)
    
    def test_dataloader(self, csv_file_path):
        transformed_dataset = Dataset(csv_file=csv_file_path,
                                    transform=Compose([
                                         Crop(),
                                        ToTensor()
                                    ]))
        dataloader = DataLoader(transformed_dataset, batch_size=self.model_config['batch_size'], shuffle=True, num_workers=self.hard_config['num_workers'], drop_last=False)
        return dataloader, len(transformed_dataset)   
            
    def train_bag_dataloader(self, csv_file_path, bag_size):
        transformed_dataset = Dataset(csv_file=csv_file_path,
                                    transform=Compose([
                                        ResizedCrop(),
                                        Jitter(),
                                        Flip(),
                                        Affine(),
                                        Perspective(),
                                        ToTensor()
                                    ]))
        dataloader = DataLoader(transformed_dataset, batch_size=bag_size, shuffle=True, num_workers=self.hard_config['num_workers'], drop_last=False)
        return dataloader, len(transformed_dataset)
    
    def test_bag_dataloader(self, csv_file_path, bag_size):
        transformed_dataset = Dataset(csv_file=csv_file_path,
                                    transform=Compose([
                                        Crop(),
                                        ToTensor()
                                    ]))
        dataloader = DataLoader(transformed_dataset, batch_size=bag_size, shuffle=False, num_workers=self.hard_config['num_workers'], drop_last=False)
        return dataloader, len(transformed_dataset)

In [13]:
models = Models(model_config)
model = models.create_model()

In [14]:
model

MILNet(
  (i_classifier): IClassifier(
    (feature_extractor): ResNet(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): BasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
          (relu): ReLU(inplace=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
        )
        (1): BasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=

In [15]:
model = models.load_weights(model)

In [16]:
model.state_dict()

OrderedDict([('i_classifier.feature_extractor.conv1.weight',
              tensor([[[[ 4.3476e-02,  9.9544e-02,  1.3139e-01,  ...,  3.7277e-02,
                         -1.3815e-02, -7.1204e-02],
                        [ 5.1369e-02,  1.1462e-01,  1.0128e-01,  ..., -7.2028e-02,
                         -6.0969e-02, -5.0834e-02],
                        [-5.9027e-03,  6.1231e-02,  4.6382e-02,  ..., -8.4130e-02,
                         -1.2275e-01, -1.0006e-01],
                        ...,
                        [-8.3053e-03, -1.7293e-02, -1.1493e-02,  ...,  1.0514e-02,
                         -2.8126e-02, -4.1263e-02],
                        [ 2.7841e-02, -3.1016e-02, -1.0077e-02,  ..., -3.1726e-03,
                         -1.0925e-02, -2.6644e-02],
                        [ 3.4669e-02, -1.0229e-02, -2.7153e-02,  ..., -1.8071e-02,
                         -1.4659e-02, -2.9459e-02]],
              
                       [[-2.1105e-02, -1.0271e-01, -1.7805e-01,  ..., -1.3346e-01,
 

In [42]:
config_widget.widget.result

{'model_type': 'Supervised',
 'label_type': 'Whole slide label',
 'backbone': 'ResNet18',
 'image_type': 'RGB',
 'output_class': 1}