In [1]:
import os
import nibabel as nib

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import pickle
import random

from scipy import ndimage

from skimage.filters import threshold_otsu
from skimage.measure import label, regionprops
from skimage import transform
from skimage.transform import resize
import skimage.exposure as skie

import ot

import torch
from torch import manual_seed
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.optim import lr_scheduler
import torchvision.models as models
import torch.backends.cudnn as cudnn
import torchvision.transforms as transforms
from torchsummary import summary
import torchvision
from torchvision import datasets, models, transforms

from torchmetrics import Accuracy

import numpy as np
import matplotlib.pyplot as plt
import time
import os
import copy

from PIL import Image 

torch.cuda.is_available()

torch.set_num_threads(3)

def compactness(blob_labels):
    import math
    compactness=[]
    region=regionprops(blob_labels)
    for rp in region:
        area=rp.area
        #perimeter=rp.perimeter
        perimeter=rp.perimeter_crofton
        c=(4*math.pi*area)/(perimeter**2)
        compactness.append(c)
    return compactness

def get_blobs(image,lesion_size):
    """
    gets the noise image after pre processing and returns blobs with the size equal to lesion size
    image is the noise image 
    lesion_size can be an int
    """
    
    labeled, nr_objects = ndimage.label(image)
    sizes = ndimage.sum_labels(image,labeled,range(nr_objects+1))    
    mask_size=sizes!=lesion_size
    small_blobs=labeled.copy()
    remove_pixel = mask_size[small_blobs]
    small_blobs[remove_pixel] = 0 
    
    return small_blobs


def list_lesions(image,lesion_size):
    round_lesion_c=0.8 #lesions with compactness above this value are considered round
    not_round_lesion_c=0.4 #lesions with compactness bellow this value are considered not round
    
    border=2
    blur=0.75
    
    less_size=lesion_size*0.05*blur #the lesion is smaller after the blur
    
    small_blobs=get_blobs(image,lesion_size)
    rb=regionprops(small_blobs)
    round_lesions=[]
    not_round_lesions=[]
    c_list=compactness(small_blobs)
    
    for blob in range(len(c_list)):
        
        if c_list[blob]>round_lesion_c:#round lesions

            blob_img=rb[blob].image.astype(float)
            #the image is padded because when it is smoothed it increases a bit
            pad_img=np.pad(array=blob_img, pad_width=border, mode='constant', constant_values=0)
            blur_image=ndimage.gaussian_filter(pad_img, blur)
            blur_image[blur_image<0.2]=0
            energy=round(np.sum(blur_image.astype(float)),2)
            if energy<=lesion_size-less_size+1 and energy>=lesion_size-less_size-1:
                round_lesions.append([blur_image,round(c_list[blob],3)])
        
        
        elif c_list[blob]<not_round_lesion_c:# not round lesions
            blob_img=rb[blob].image.astype(float)
            #the image is padded because when it is smoothed it increases a bit
            pad_img=np.pad(array=blob_img, pad_width=border, mode='constant', constant_values=0)
            blur_image=ndimage.gaussian_filter(pad_img, blur)
            blur_image[blur_image<0.15]=0
            energy=round(np.sum(blur_image.astype(float)),2)
            if energy<=lesion_size-less_size+1 and energy>=lesion_size-less_size-1:
                not_round_lesions.append([blur_image,round(c_list[blob],3)])
            
    return round_lesions, not_round_lesions

def create_lesions(lesion_number,lesion_size,factor=1):
    #factor is the value for which we multiply the sides of the lesion
    round_lesions=[]
    not_round_lesions=[]
    s=0
    size_noise=256
    blur_radius=2

    while len(not_round_lesions)<=lesion_number or len(round_lesions)<=lesion_number:
        #create noise
        np.random.seed(s)
        noise_img=np.random.rand(size_noise,size_noise)

        #smooth noise
        imgf=ndimage.gaussian_filter(noise_img, blur_radius)

        #create binary image
        thr=threshold_otsu(imgf)
        imgf_bin=imgf>thr
        
        #morphologic changes
        erosion_image=ndimage.binary_erosion(imgf_bin)
        open_er_img=ndimage.binary_opening(erosion_image)
        erosion_image2=ndimage.binary_erosion(open_er_img)

        #images
        

        #as a result from one noise image we create several images that can be used to create the lesions
        round_lesions_open, not_round_lesions_open=list_lesions(open_er_img,lesion_size)
        round_lesions_er, not_round_lesions_er=list_lesions(erosion_image2,lesion_size)
        
        round_lesions=round_lesions+round_lesions_open+round_lesions_er
        not_round_lesions=not_round_lesions+not_round_lesions_open+not_round_lesions_er
        
        
        #print('len lists:',len(round_lesions),len(not_round_lesions))
        '''        
        plt.figure(figsize=(15,7))
        plt.subplot(2,4,1)
        plt.imshow(noise_img)
        plt.title('noise')
        plt.subplot(2,4,2)
        plt.imshow(imgf)
        plt.title('blur')
        plt.subplot(2,4,3)
        plt.imshow(imgf_bin)
        plt.title('binary img')
        plt.subplot(2,4,4)
        plt.imshow(erosion_image)
        plt.title('erosion')
        plt.subplot(2,4,5)
        plt.imshow(open_er_img)
        plt.title('open')
        plt.subplot(2,4,6)
        plt.imshow(erosion_image2)
        plt.title('erosion2')
        plt.subplot(2,4,7)
        plt.imshow(dilated_image)
        plt.title('dilation')
        break
        '''
        
        s+=1
    print(f'round lesions: {len(round_lesions)}/{lesion_number} === not round lesions {len(not_round_lesions)}/{lesion_number}')
    print(f'number of seeds used: {s-1}')
    print(f'the lesions have size between {lesion_size-lesion_size*0.05*0.75-1} and {lesion_size-lesion_size*0.05*0.75+1}')
    
    lesions_r=round_lesions[:lesion_number]
    lesions_nr=not_round_lesions[:lesion_number]
    round_lesions=[[np.array(resize(round_lesions[i][0],(round_lesions[i][0].shape[0]*factor,round_lesions[i][0].shape[1]*factor))),round_lesions[i][1]] for i in range(len(lesions_r))]
    not_round_lesions=[[np.array(resize(not_round_lesions[i][0],(not_round_lesions[i][0].shape[0]*factor,not_round_lesions[i][0].shape[1]*factor))),not_round_lesions[i][1]] for i in range(len(lesions_nr))]
    
    return round_lesions, not_round_lesions

def rescale_values(image,max_val,min_val):
    '''
    image - numpy array
    max_val/min_val - float
    '''
    return (image-image.min())/(image.max()-image.min())*(max_val-min_val)+min_val

def select_coordinates(slice_image, lesions,white_constant,seed):
    '''
    slice_image is the brain slice to use
    lesions is a list of the lesions (with len=number_of_lesions) to use
    colour_lesion is either 'black' or 'white'
    white_constant is the constant that is multiplied with the lesion mask to create lighter or darker lesions
    '''

    np.random.seed(seed)
    
    brain_mask=np.array(slice_image)>0
    brain_mask=brain_mask.astype(float)
    x,y = np.where(brain_mask == 1.)
    
    lesion_brain=slice_image.copy().astype(float)
    lesion_mask=brain_mask.copy() 
    lesion_added=0
    ground_truth=np.zeros(slice_image.shape)
    min_value=0.1 #min value for the lesion intensity 
    max_value=0.9 #max value for the lesion intensity
    brain_image=slice_image.copy()
    
    while lesion_added<len(lesions):
        i=np.random.choice(np.arange(len(x)))
        coordinate=[x[i],y[i]]
        lesion=lesions[lesion_added]
        lesion_rescale = rescale_values(lesion,max_value,min_value)
        lesion_rescale=rescale_values(lesion,white_constant,min_value)
        
        #creating the lesion mask and ground truth
        if (brain_mask[coordinate[0]: coordinate[0] + lesion.shape[0], coordinate[1]: coordinate[1] + lesion.shape[1]] ==1).all():
            #checks if the lesion that will be added is completly in a white space of the lesion mask 
            #(this means that the new lesion is not overlaping an existing one and is completly in the brain area)
            lesion_mask[coordinate[0]: coordinate[0] + lesion.shape[0], coordinate[1]: coordinate[1] + lesion.shape[1]] -= lesion_rescale
            lesion_added+=1
            ground_truth[coordinate[0]: coordinate[0] + lesion.shape[0], coordinate[1]: coordinate[1] + lesion.shape[1]]+=lesion
            brain_mask=lesion_mask

    
    brain_image=slice_image.copy()
    brain_mask=np.array(slice_image)>0
    #creating the white lesions
       
    brain_image[brain_image>0]=1-brain_image[brain_image>0]        
    brain_image[lesion_mask!=0]*=lesion_mask[lesion_mask!=0]
    brain_image[brain_mask]=1-brain_image[brain_mask]
    

    
    return lesion_mask,brain_image,ground_truth
    
def add_lesions(slice_image,round_lesions,not_round_lesions,min_lesion,max_lesion,white_constant,seed,max_brain):
    #for each slice we chose: random number of lesions, random lesions, random coordinates
    
    np.random.seed(seed)
    number_of_lesions=np.random.randint(min_lesion,max_lesion+1)
    lesion_type=np.random.randint(0,2)
    #lesion_type=0 - round
    #lesion_type=1 - not round
    
    #get lesions from type of lesions (and target)
    added=rescale_values(slice_image.copy(),max_brain,0)
    if lesion_type==0: #round
        target=0
        with open('round_lesions.pkl', 'rb') as f:
            lesion_list=pickle.load(f)
        np.random.shuffle(round_lesions)
        
    elif lesion_type==1: #not round
        target=1
        with open('not_round_lesions.pkl', 'rb') as f:
            lesion_list=pickle.load(f) 
        np.random.shuffle(lesion_list)
        
    lesions=[i[0] for i in lesion_list[:number_of_lesions]]
    #add the lesions
    
    lesion_mask,lesion_brain_white,ground_truth=select_coordinates(added, lesions,white_constant,seed)
    
    
    return lesion_mask,lesion_brain_white,ground_truth,target,number_of_lesions

def change_images(image):
    image=np.repeat(image[..., np.newaxis], 3, axis=2)
    image=resize(image, (224, 224))
    image=image.transpose(2,0,1)
    return image

def create_dataset(slices,round_lesions,not_round_lesions,min_lesion=3,max_lesion=5,white_constant=0.85,seed=0,max_brain=1):

    dataset_white=[]
    number_lesions=[]
    lesion_mask_list=[]
    ground_truths=[]
    for slice_idx in range(len(slices)):
        lesion_mask,lesion_brain_white,ground_truth,target,number_of_lesions=add_lesions(slices[slice_idx],
                                                                                         round_lesions,
                                                                                         not_round_lesions,
                                                                                         min_lesion=min_lesion,
                                                                                         max_lesion=max_lesion,
                                                                                         white_constant=white_constant,
                                                                                        seed=seed,
                                                                                        max_brain=max_brain)
        dataset_white.append([change_images(lesion_brain_white),target])
        number_lesions.append(number_of_lesions)
        lesion_mask_list.append(lesion_mask)
        ground_truths.append(ground_truth)
        seed+=1
        
        if slice_idx%1500==0:
            print(f'slice {slice_idx}/{len(slices)} = {round(100*slice_idx/len(slices),2)}%')
        
    return dataset_white,number_lesions,lesion_mask_list,ground_truths
        
    


In [2]:
# creating dataset

seed=0
np.random.seed(seed)
random.seed(seed)
plt.rc('image',cmap='gray')  

start = time.time()

# creating lesions
number_of_lesions=50 #amount of lesions in each lesion list
size_of_lesions=70 #size of all the lesions
factor=2

round_lesions, not_round_lesions=create_lesions(number_of_lesions,size_of_lesions,factor=factor)

done = time.time()
elapsed = done - start
print(f'took {round(elapsed,2)}s')

# loading slices 
with open('slices_validation.pkl', 'rb') as f:
    validation_slices,target_valid_gender,target_valid_age = pickle.load(f)
    
    
# adding lesions to slices
lesion_max_intensity=0.5
max_brain_intensity=0.7

start = time.time()
print(' ====== holdout ====== ')

dataset,_,_,ground_truths=create_dataset(validation_slices,
                                          round_lesions,
                                          not_round_lesions,
                                          min_lesion=3,
                                          max_lesion=5,
                                          white_constant=lesion_max_intensity,
                                          seed=50000,
                                          max_brain=max_brain_intensity)

done = time.time()
elapsed = done - start
print()
print(f'took {round(elapsed,2)}s')
target_w=[i[1] for i in dataset]
print(f'{len([i for i in target_w if i==1])} slices of target 1 out of {len(target_w)} slices: {round(100*len([i for i in target_w if i==1])/len(target_w),2)} %')
print(f' number of slices: {len(dataset)}')


round lesions: 51/50 === not round lesions 207/50
number of seeds used: 886
the lesions have size between 66.375 and 68.375
took 10.5s
slice 0/8539 = 0.0%
slice 1500/8539 = 17.57%
slice 3000/8539 = 35.13%
slice 4500/8539 = 52.7%
slice 6000/8539 = 70.27%
slice 7500/8539 = 87.83%

took 154.7s
4277 slices of target 1 out of 8539 slices: 50.09 %
 number of slices: 8539


In [3]:
def boolList2BinString(lst):
    # lst is a binary list that corresponds to the comparison between the predicted labels and the real labels
    # returns a binary string version of the list
    
    return '0b' + ''.join(['1' if x else '0' for x in lst])

In [4]:
def correctly_classified_intersection(to_save_list):
    # to_save_list is a dictionary with the keys: 
    #    model: name of the file where the model is saved
    #    AUROC: AUROC of the correspondig model
    #    ACC: Accuracy of the correspondig model
    #    AUPRC: AUPRC of the correspondig model
    #    real_labels: tensor with the real labels
    #    pred_labels: list of the predicted labels
    #    logits: tensor with the logits predicted by the model
    #    block: degree of finetuning (1, 2, 3, 4 or a)
    #    seed: the seed used to train the model
    #
    # returns the indexes of the images that are correctly classified by every model
    # (the dataset of these images should be the one used to obtain the labels and logits of to_save_list)

    bools = []
    for i in to_save_list:
        compare_list = np.array(i['real_labels'])==np.array(i['pred_labels'])
        bins = int(boolList2BinString(compare_list),2)
        bools.append(bins)

    #obtain the intersection of the correctly classified values using bitwize and between the bins and a list of ones 
    value = '1'
    l=[str(value) for _ in range(len(dataset))]
    res=int('0b'+''.join(l),2)
    for i in bools:
        res = res & i #bitwise and

    # calculate the number of correct 
   
    r='{0:08319b}'.format(res)
    n=0
    for i in r:
        if i=='1':
            n+=1
    
    print(f"Obtained {n} images correctly classified by all the models in {len('{0:08319b}'.format(res))} total images")

    idx=[i for i,x in enumerate(r) if x=='1']
    
        
    return idx
    

In [5]:

def incorrectly_classified_intersection(to_save_list):
    # to_save_list is a dictionary with the keys: 
    #    model: name of the file where the model is saved
    #    AUROC: AUROC of the correspondig model
    #    ACC: Accuracy of the correspondig model
    #    AUPRC: AUPRC of the correspondig model
    #    real_labels: tensor with the real labels
    #    pred_labels: list of the predicted labels
    #    logits: tensor with the logits predicted by the model
    #    block: degree of finetuning (1, 2, 3, 4 or a)
    #    seed: the seed used to train the model
    #
    # returns the indexes of the images that are correctly classified by every model
    # (the dataset of these images should be the one used to obtain the labels and logits of to_save_list)

    bools = []
    for i in to_save_list:
        compare_list = np.array(i['real_labels'])==np.array(i['pred_labels'])
        bins = int(boolList2BinString(compare_list),2)
        bools.append(bins)

    #obtain the intersection of the correctly classified values using bitwize or between the bins and a list of zeros 
    value = '0'
    l=[str(value) for _ in range(len(dataset))]
    res=int('0b'+''.join(l),2)
    for i in bools:
        res = res | i #bitwise or

    # calculate the number of correct 
    r='{0:08319b}'.format(res)
    n=0
    for i in r:
        if i=='0':
            n+=1
            
    print(f"Obtained {n} images incorrectly classified by all the models in {len('{0:08319b}'.format(res))} total images")

    idx=[i for i,x in enumerate(r) if x=='0']

    return idx

In [6]:
# functions to obtain the explanations and ratios 

import torch
from torch import manual_seed
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.optim import lr_scheduler
import torchvision.models as models
import torch.backends.cudnn as cudnn
import torchvision.transforms as transforms
from torchsummary import summary
import torchvision
from torchvision import datasets, models, transforms

import numpy as np
import matplotlib.pyplot as plt
import time
import os
import copy

from PIL import Image 

torch.cuda.is_available()

torch.set_num_threads(3)

from captum.attr import IntegratedGradients, Saliency, DeepLift, DeepLiftShap, GradientShap, GuidedBackprop,  GuidedGradCam 
from captum.attr import Deconvolution, ShapleyValueSampling, Lime, KernelShap, LRP, InputXGradient, FeatureAblation
from scipy.ndimage import sobel, laplace


def high_intensity_ratio(explanation,gt,EMD,ratio,plots=True):
    #explanation is the absolute value of the attributions from the captum methods (1, 3, 224, 224)
    # ground truth is a 1 channel image ()
    
    rgb_weights = [0.2989, 0.5870, 0.1140]
    
    att=explanation.squeeze(0).transpose(1,2,0)    
    grayscale_att = np.dot(att[...,:3], rgb_weights)
    

    p = len(gt[gt>0]) #number of pixels in ground truth 
    
    if ratio:
        #finds the p most intense values in the attribution
        sorted_=np.sort(grayscale_att, axis=None)
        sorted_=sorted_[::-1]
        min_att_val=sorted_[p]

        mask=grayscale_att>min_att_val

        multi=mask*gt #this deletes the high intensity points that are not in the ground truth
        ratio=round(len(multi[multi>0])/p,2)

    if EMD:
            #EMD
        #obtaining the coordinates of the images
        x_mask,y_mask = np.where(mask !=0.)
        x_gt,y_gt = np.where(gt !=0.)

        points_mask=np.array([[y_mask[i],x_mask[i]] for i in range(len(y_mask))])
        points_gt=np.array([[y_gt[i],x_gt[i]] for i in range(len(y_gt))])

        a, b = np.ones((p,)) / p, np.ones((p,)) / p  # uniform distribution on samples

        M = ot.dist(points_mask, points_gt) #cost matrix

        G0 = ot.emd(a, b, M) # uniform dist - every point has the same importance (we could consider the intensities)

        EMD = np.sum(G0 * M)

    if plots:
        plt.figure(figsize=(10, 12))
        plt.subplot(2,2,1)
        plt.imshow(grayscale_att,cmap='inferno')
        plt.title('Explanation')
        plt.subplot(2,2,2)
        plt.imshow(gt,cmap='gray')
        plt.title('Ground Truth')
        plt.subplot(2,2,3)
        plt.imshow(mask,cmap='gray')  
        plt.title('Highest intensity points')
        plt.subplot(2,2,4)
        plt.imshow(multi,cmap='gray')
        plt.title('Highest intensity points\n in the ground truth')
    
    return ratio,EMD

def get_ratios_list(model,holdout_dataset,gt_dataset,ratio,emd):
    t0=time.time()
    ratios={'gradient':[],'gradientshap':[],'deeplift':[],'saliency':[],'InputXGradient':[],'backprop':[],
            'deconv':[],'LRP':[]}
                   
    EMDs={'gradient':[],'gradientshap':[],'deeplift':[],'saliency':[],'InputXGradient':[],'backprop':[],
            'deconv':[],'LRP':[]}
                   
    for n in range(len(holdout_dataset)):
        target=holdout_dataset[n][1]
        test_img = holdout_dataset[n][0]
        transformed_img=torch.tensor(test_img)
        input_image = transformed_img.unsqueeze(0)
        data=input_image.to(DEVICE,dtype=torch.float)
        
       
        attribution_gradient = abs(IntegratedGradients(model).attribute(data, target=target).cpu().detach().numpy())
        attribution_gradientshap = abs(GradientShap(model).attribute(data, target=target, baselines=torch.zeros(data.shape).to(DEVICE)).cpu().detach().numpy())
        attribution_deeplift = abs(DeepLift(model).attribute(data, target=target).cpu().detach().numpy())
        attribution_saliency = abs(np.array(torch.Tensor.cpu(Saliency(model).attribute(data, target=target))))
        attribution_InputXGradient=abs(InputXGradient(model).attribute(data, target=target).cpu().detach().numpy())
        attribution_backprop = abs(np.array(torch.Tensor.cpu(GuidedBackprop(model).attribute(data, target=target))))
        attribution_deconv = abs(Deconvolution(model).attribute(data, target=target).cpu().detach().numpy())
        attribution_LRP = abs(LRP(model).attribute(data, target=target).cpu().detach().numpy())
       

        rgb_weights = [0.2989, 0.5870, 0.1140]
        gt=np.dot(change_images(gt_dataset[n]).transpose(1,2,0)[...,:3],rgb_weights) 

        if ratio:
            ratios['gradient'].append(high_intensity_ratio(attribution_gradient,gt,0,1,plots=False)[0])
            ratios['gradientshap'].append(high_intensity_ratio(attribution_gradientshap,gt,0,1,plots=False)[0])
            ratios['deeplift'].append(high_intensity_ratio(attribution_deeplift,gt,0,1,plots=False)[0])
            ratios['saliency'].append(high_intensity_ratio(attribution_saliency,gt,0,1,plots=False)[0])
            ratios['InputXGradient'].append(high_intensity_ratio(attribution_InputXGradient,gt,0,1,plots=False)[0])
            ratios['backprop'].append(high_intensity_ratio(attribution_backprop,gt,0,1,plots=False)[0])
            ratios['deconv'].append(high_intensity_ratio(attribution_deconv,gt,0,1,plots=False)[0])
            ratios['LRP'].append(high_intensity_ratio(attribution_LRP,gt,0,1,plots=False)[0])

        
        if emd:
            EMDs['gradient'].append(high_intensity_ratio(attribution_gradient,gt,1,0,plots=False)[1])
            EMDs['gradientshap'].append(high_intensity_ratio(attribution_gradientshap,gt,1,0,plots=False)[1])
            EMDs['deeplift'].append(high_intensity_ratio(attribution_deeplift,gt,1,0,plots=False)[1])
            EMDs['saliency'].append(high_intensity_ratio(attribution_saliency,gt,1,0,plots=False)[1])
            EMDs['InputXGradient'].append(high_intensity_ratio(attribution_InputXGradient,gt,1,0,plots=False)[1])
            EMDs['backprop'].append(high_intensity_ratio(attribution_backprop,gt,1,0,plots=False)[1])
            EMDs['deconv'].append(high_intensity_ratio(attribution_deconv,gt,1,0,plots=False)[1])
            EMDs['LRP'].append(high_intensity_ratio(attribution_LRP,gt,1,0,plots=False)[1])
     
        if n%500==0:
            print(f'{n}/{len(holdout_dataset)} : {round(n/len(holdout_dataset)*100,2)} % ')
            print(f'time elapsed: {(time.time()-t0) // 60:.0f}m {(time.time()-t0) % 60:.0f}s')

   
    print(f'total time: {(time.time()-t0) // 60:.0f}m {(time.time()-t0) % 60:.0f}s')
    return ratios,EMDs

################################ 


def load_VGG_model(path,device):
    model = models.vgg16(pretrained=True)
    model.classifier=model.classifier[:-1]
    last_layers=[nn.Linear(4096,2)]
    model.classifier = nn.Sequential(*list(model.classifier)+last_layers) 

    model.load_state_dict(torch.load(path,map_location=device))
    
    model.features[1]=nn.ReLU(inplace=False)
    model.features[3]=nn.ReLU(inplace=False)
    model.features[6]=nn.ReLU(inplace=False)
    model.features[8]=nn.ReLU(inplace=False)
    model.features[11]=nn.ReLU(inplace=False)
    model.features[13]=nn.ReLU(inplace=False)
    model.features[15]=nn.ReLU(inplace=False)
    model.features[18]=nn.ReLU(inplace=False)
    model.features[20]=nn.ReLU(inplace=False)
    model.features[22]=nn.ReLU(inplace=False)
    model.features[25]=nn.ReLU(inplace=False)
    model.features[27]=nn.ReLU(inplace=False)
    model.features[29]=nn.ReLU(inplace=False)
    model.classifier[1]=nn.ReLU(inplace=False)
    model.classifier[4]=nn.ReLU(inplace=False)
    
    model.to(device)
    return model

## Best performing models - IMAGENET

In [7]:
with open('/home/martao/MRI_dataset/2ndTry/Models/VGG/2500/best_acc/saves/to_save.pkl', 'rb') as f:
    to_save_best_img = pickle.load(f)
    
folder = '/home/martao/MRI_dataset/2ndTry/Models/VGG/2500/best_acc/done/'

In [8]:
# correctly classified intersection
idx = correctly_classified_intersection(to_save_best_img)

correct=[dataset[i] for i in idx]
correct_gt=[resize(ground_truths[i],(224,224)) for i in idx]

Obtained 4574 images correctly classified by all the models in 8539 total images


In [9]:
# incorrectly classified intersection
idx = incorrectly_classified_intersection(to_save_best_img)

incorrect=[dataset[i] for i in idx]
incorrect_gt=[resize(ground_truths[i],(224,224)) for i in idx]

Obtained 7 images incorrectly classified by all the models in 8539 total images


In [10]:
# incorrectly classified by one of the models:
incorrectly_classified = {}

print('number correct || number total || number incorrect ')
for i in to_save_best_img:
    compare_list = np.array(i['real_labels'])==np.array(i['pred_labels'])
    print(sum(compare_list),'\t\t',len(compare_list),'\t\t',len(compare_list)-sum(compare_list))
    incorrect_indexes = np.nonzero(~compare_list)[0]
    incorrectly_classified[i['model']] = incorrect_indexes 
    

number correct || number total || number incorrect 
6344 		 8539 		 2195
8409 		 8539 		 130
6337 		 8539 		 2202
8219 		 8539 		 320
8439 		 8539 		 100
8446 		 8539 		 93
8491 		 8539 		 48
8445 		 8539 		 94
6367 		 8539 		 2172
8240 		 8539 		 299
8480 		 8539 		 59
8130 		 8539 		 409
8185 		 8539 		 354
6357 		 8539 		 2182
8150 		 8539 		 389
8448 		 8539 		 91
8408 		 8539 		 131
8481 		 8539 		 58
8442 		 8539 		 97
6191 		 8539 		 2348
8177 		 8539 		 362
8377 		 8539 		 162
8462 		 8539 		 77
8473 		 8539 		 66
8489 		 8539 		 50


In [12]:
folder

'/home/martao/MRI_dataset/2ndTry/Models/VGG/2500/best_acc/done/'

In [13]:
with open(folder[:-5]+'/saves/incorrectly_classified.pkl', 'wb') as f:
     pickle.dump(incorrectly_classified,f)

In [15]:

models_names = os.listdir(folder)
models_names = [i for i in models_names if (i[0]!='.' and i.find('MRI')==-1)]
ratios_dict = {}

DEVICE = 'cuda:1'

for m in models_names:
    print(m)
    model=load_VGG_model(folder+m,DEVICE)
    ratios=get_ratios_list(model,[dataset[i] for i in incorrectly_classified[m[:-3]]],[ground_truths[i] for i in incorrectly_classified[m[:-3]]],1,0)
    
    ratios_dict[m] = ratios
    
    print()
    
    with open(folder[:-5]+'saves/+ratios_incorrectly.pkl', 'wb') as f:
         pickle.dump(ratios_dict,f)
    

new_finetuning_1conv_0.5_img_2500_0.02_18464.pt
0/2195 : 0.0 % 
time elapsed: 0m 1s
500/2195 : 22.78 % 
time elapsed: 5m 15s
1000/2195 : 45.56 % 
time elapsed: 10m 31s
1500/2195 : 68.34 % 
time elapsed: 15m 48s
2000/2195 : 91.12 % 
time elapsed: 21m 5s
total time: 23m 7s

new_finetuning_all_0.5_img_2500_0.004_58461.pt
0/130 : 0.0 % 
time elapsed: 0m 1s
total time: 1m 22s

new_finetuning_1conv_0.5_img_2500_0.02_646976.pt
0/2202 : 0.0 % 
time elapsed: 0m 1s
500/2202 : 22.71 % 
time elapsed: 5m 17s
1000/2202 : 45.41 % 
time elapsed: 10m 32s
1500/2202 : 68.12 % 
time elapsed: 15m 49s
2000/2202 : 90.83 % 
time elapsed: 21m 5s
total time: 23m 12s

new_finetuning_2conv_0.5_img_2500_0.008_18464876.pt
0/320 : 0.0 % 
time elapsed: 0m 1s
total time: 3m 22s

new_finetuning_4conv_0.5_img_2500_0.008_32323548.pt
0/100 : 0.0 % 
time elapsed: 0m 1s
total time: 1m 3s

new_finetuning_4conv_0.5_img_2500_0.008_116560000.pt
0/93 : 0.0 % 
time elapsed: 0m 1s
total time: 0m 59s

new_finetuning_4conv_0.5_img_2

In [16]:
 with open(folder[:-5]+'saves/ratios_incorrectly.pkl', 'wb') as f:
         pickle.dump(ratios_dict,f)

In [17]:
incorrectly_classified

{'new_finetuning_1conv_0.5_img_2500_0.02_18464': array([   3,    6,    7, ..., 8529, 8534, 8535]),
 'new_finetuning_all_0.5_img_2500_0.004_58461': array([ 252,  269,  296,  306,  336,  431,  456,  526,  587,  589,  727,
         750,  879, 1129, 1237, 1333, 1380, 1391, 1399, 1741, 1832, 1845,
        1877, 1916, 2041, 2088, 2184, 2189, 2335, 2360, 2387, 2412, 2496,
        2581, 2682, 2739, 2744, 2805, 2809, 2868, 2953, 2977, 3031, 3052,
        3074, 3083, 3116, 3320, 3411, 3576, 3780, 3792, 3828, 3924, 4071,
        4085, 4138, 4199, 4460, 4554, 4606, 4618, 4623, 4664, 4715, 4772,
        4824, 5080, 5084, 5146, 5246, 5274, 5313, 5314, 5342, 5390, 5474,
        5478, 5481, 5506, 5515, 5640, 5664, 5693, 5785, 5940, 5994, 6013,
        6122, 6176, 6244, 6345, 6351, 6414, 6449, 6497, 6512, 6514, 6556,
        6576, 6597, 6657, 6667, 6802, 6890, 7175, 7179, 7271, 7273, 7303,
        7470, 7501, 7545, 7546, 7562, 7578, 7673, 7691, 7758, 7796, 7801,
        7934, 7939, 7960, 8091, 8228, 82

## Same performing models - IMAGENET

In [18]:
with open('/home/martao/MRI_dataset/2ndTry/Models/VGG/2500/done_same_acc/saves/to_save.pkl', 'rb') as f:
    to_save_same_img = pickle.load(f)
    

In [19]:
# correctly classified intersection
idx = correctly_classified_intersection(to_save_best_img)

correct=[dataset[i] for i in idx]
correct_gt=[resize(ground_truths[i],(224,224)) for i in idx]

Obtained 4574 images correctly classified by all the models in 8539 total images


In [20]:
# incorrectly classified intersection
idx = incorrectly_classified_intersection(to_save_best_img)

incorrect=[dataset[i] for i in idx]
incorrect_gt=[resize(ground_truths[i],(224,224)) for i in idx]

Obtained 7 images incorrectly classified by all the models in 8539 total images


In [21]:
# incorrectly classified by one of the models:
incorrectly_classified = {}

print('number correct || number total || number incorrect ')
for i in to_save_best_img:
    compare_list = np.array(i['real_labels'])==np.array(i['pred_labels'])
    print(sum(compare_list),'\t\t',len(compare_list),'\t\t',len(compare_list)-sum(compare_list))
    incorrect_indexes = np.nonzero(~compare_list)[0]
    incorrectly_classified[i['model']] = incorrect_indexes 
    

number correct || number total || number incorrect 
6344 		 8539 		 2195
8409 		 8539 		 130
6337 		 8539 		 2202
8219 		 8539 		 320
8439 		 8539 		 100
8446 		 8539 		 93
8491 		 8539 		 48
8445 		 8539 		 94
6367 		 8539 		 2172
8240 		 8539 		 299
8480 		 8539 		 59
8130 		 8539 		 409
8185 		 8539 		 354
6357 		 8539 		 2182
8150 		 8539 		 389
8448 		 8539 		 91
8408 		 8539 		 131
8481 		 8539 		 58
8442 		 8539 		 97
6191 		 8539 		 2348
8177 		 8539 		 362
8377 		 8539 		 162
8462 		 8539 		 77
8473 		 8539 		 66
8489 		 8539 		 50


In [23]:
folder = '/home/martao/MRI_dataset/2ndTry/Models/VGG/2500/done_same_acc/'

with open(folder+'/saves/incorrectly_classified.pkl', 'wb') as f:
     pickle.dump(incorrectly_classified,f)
        
incorrectly_classified

{'new_finetuning_1conv_0.5_img_2500_0.02_18464': array([   3,    6,    7, ..., 8529, 8534, 8535]),
 'new_finetuning_all_0.5_img_2500_0.004_58461': array([ 252,  269,  296,  306,  336,  431,  456,  526,  587,  589,  727,
         750,  879, 1129, 1237, 1333, 1380, 1391, 1399, 1741, 1832, 1845,
        1877, 1916, 2041, 2088, 2184, 2189, 2335, 2360, 2387, 2412, 2496,
        2581, 2682, 2739, 2744, 2805, 2809, 2868, 2953, 2977, 3031, 3052,
        3074, 3083, 3116, 3320, 3411, 3576, 3780, 3792, 3828, 3924, 4071,
        4085, 4138, 4199, 4460, 4554, 4606, 4618, 4623, 4664, 4715, 4772,
        4824, 5080, 5084, 5146, 5246, 5274, 5313, 5314, 5342, 5390, 5474,
        5478, 5481, 5506, 5515, 5640, 5664, 5693, 5785, 5940, 5994, 6013,
        6122, 6176, 6244, 6345, 6351, 6414, 6449, 6497, 6512, 6514, 6556,
        6576, 6597, 6657, 6667, 6802, 6890, 7175, 7179, 7271, 7273, 7303,
        7470, 7501, 7545, 7546, 7562, 7578, 7673, 7691, 7758, 7796, 7801,
        7934, 7939, 7960, 8091, 8228, 82

In [24]:

models_names = os.listdir(folder)
models_names = [i for i in models_names if (i[0]!='.' and i.find('MRI')==-1)]
ratios_dict = {}

DEVICE = 'cuda:1'

for m in models_names:
    print(m)
    model=load_VGG_model(folder+m,DEVICE)
    ratios=get_ratios_list(model,[dataset[i] for i in incorrectly_classified[m[:-3]]],[ground_truths[i] for i in incorrectly_classified[m[:-3]]],1,0)
    
    ratios_dict[m] = ratios
    
    print()
    
    with open(folder+'saves/ratios_incorrectly.pkl', 'wb') as f:
         pickle.dump(ratios_dict,f)
    

new_finetuning_all_0.5_img_2500_0.0006_32323548.pt


KeyError: 'new_finetuning_all_0.5_img_2500_0.0006_32323548'

In [None]:
 with open(folder+'saves/ratios_incorrectly.pkl', 'wb') as f:
         pickle.dump(ratios_dict,f)