In this script we implement LFs for Mitral Regurgitation derived using the training set and validate it using the train and dev set 

In [1]:
%matplotlib inline
#%matplotlib
import cv2 
import imageio
import pickle
import pydicom
import random
import numpy as np
import pandas as pd
from glob import glob
import matplotlib.pyplot as plt
from skimage.measure import label, regionprops
from skimage.segmentation import find_boundaries
from scipy.sparse import csr_matrix

import sys
sys.path.append('../metal')

In [2]:
def OCIndex2Label(index_open, index_close):
    """
    Converts from open and close index to frame-wise labels   
    
    Params
    -------
    index_open: int
    index_close: int
    
    Return
    -------
    frame_labels: numpy array of shape (1,50)
    
    """
    index_open = int(index_open)
    index_close = int(index_close)
    
    frame_labels = np.zeros([1,50]);
    if(index_close >= index_open):
        frame_labels[0,index_open:index_close] = 1
    else:
        frame_labels[0,index_open:50] = 1
        frame_labels[0,0:index_close] = 1
        
    return frame_labels   

In [3]:
def findAreaLabel(mask):
    '''
    function to find labels based on area of left atrium
    
    Input
    -----
    mask: float (50,208,174)
    
    Output
    -----
    label: float (1,50)
    
    '''
    area_la = np.sum(np.sum(mask==1.0,axis=2),axis=1) # area of left atrium
    
    # finding when mitral valve opens - when area is largest
    index_open = np.argmax(area_la)

    # finding when mitral valve closes - when area is smallest
    index_close = np.argmin(area_la)
    
    label = OCIndex2Label(index_open, index_close)
    
    # changing to 1-indexing from 0-indexing
    label = label + 1
    
    # adding abstain votes - exact location of open / close is uncertain
    label[:,index_open-3:index_open+2] = 0
    index_low = (index_close-3)%50
    index_high = (index_close+2)%50
    if(index_close+2 < 50 and index_close-3 >=0):
        label[:,index_low:index_high] = 0
    else:
        label[:,index_low:50] = 0
        label[:,0:index_high] = 0
    
    return label

In [4]:
def findPerimeterLabel(mask_original):
    '''
    function to find labels based on perimeter of left atrium
    
    Input
    -----
    mask_original: float (50,208,174)
    
    Output
    -----
    label: float (1,50)
    
    '''
    
    # finding perimeter of left atrium 
    mask = np.copy(mask_original)
    mask[mask!=1.0] = 0.0
    mask = mask.astype(int)
    perimeter = np.zeros((50,1))
    for frame in range(50):
        regions = regionprops(mask[frame])
        perimeter[frame] = regions[0].perimeter
        
    # mitral valve opens - when perimeter is largest
    index_open = np.argmax(perimeter)

    # mitral valve closes - when perimeter is smallest
    index_close = np.argmin(perimeter)
    
    label = OCIndex2Label(index_open, index_close)
    
    # changing to 1-indexing from 0-indexing
    label = label + 1
    
    # adding abstain votes - exact location of open / close is uncertain
    label[:,index_open-3:index_open+2] = 0
    index_low = (index_close-3)%50
    index_high = (index_close+2)%50
        
    if(index_close+2 < 50 and index_close-3 >=0):
        label[:,index_low:index_high] = 0
    else:
        label[:,index_low:50] = 0
        label[:,0:index_high] = 0
    
    return label

In [5]:
def findIntensityLabel(data, mask_original):
    '''
    function to define labels based on intensity within left atrium
    
    Input
    -----
    data: float (50,208,174)
    mask_original: float (50,208,174)
    
    Output
    -----
    label: float (1,50)
    
    '''
    # avg intensity within segment
    #mask = [mask_original == 1.0] 
    mask = np.copy(mask_original)
    mask[mask!=1.0] = 0.0
    mask = mask.astype(float)
    area_intensity = np.mean(np.mean(np.multiply(mask,data),axis=2),axis=1)
        
    # finding when mitral valve opens - when area is largest
    index_open = np.argmax(area_intensity)

    # finding when mitral valve closes - when area is smallest
    index_close = np.argmin(area_intensity)
    
    label = OCIndex2Label(index_open, index_close)
    
    # changing to 1-indexing from 0-indexing
    label = label + 1
    
    # adding abstain votes - exact location of open / close is uncertain
    label[:,index_open-3:index_open+2] = 0
    index_low = (index_close-4)%50
    index_high = (index_close+3)%50
    if(index_close+2 < 50 and index_close-3 >=0):
        label[:,index_low:index_high] = 0
    else:
        label[:,index_low:50] = 0
        label[:,0:index_high] = 0
        
    return label

In [6]:
def findIntensityLabel2(data, mask_original):
    '''
    function to define labels based on intensity along perimeter of left atrium
    
    Input
    -----
    data: float (50,208,174)
    mask: float (50,208,174)
    
    Output
    -----
    label: float (1,50)
    
    '''
    # avg intensity within segment
    #mask = [mask_original == 1.0] 
    mask = np.copy(mask_original)
    mask[mask!=1.0] = 0.0
    mask = mask.astype(int)
    boundary = find_boundaries(mask,mode='outer')
    intensity = np.mean(np.mean(np.multiply(boundary,data),axis=2),axis=1)
        
    # finding when mitral valve opens - when area is largest
    index_open = np.argmax(intensity)

    # finding when mitral valve closes - when area is smallest
    index_close = np.argmin(intensity)
    
    label = OCIndex2Label(index_open, index_close)
    
    # changing to 1-indexing from 0-indexing
    label = label + 1
    
    # adding abstain votes - exact location of open / close is uncertain
    label[:,index_open-3:index_open+2] = 0
    index_low = (index_close-4)%50
    index_high = (index_close+3)%50
    if(index_close+2 < 50 and index_close-3 >=0):
        label[:,index_low:index_high] = 0
    else:
        label[:,index_low:50] = 0
        label[:,0:index_high] = 0
    
    return label

In [7]:
def findLabelPrior():
    index_open = 20
    index_close = 1
    
    label = OCIndex2Label(index_open,index_close)
    
    # changing to 1-indexing from 0-indexing
    label = label + 1
    
    # adding abstain votes - exact location of open / close is uncertain
    label[:,index_open-3:index_open+2] = 0
    index_low = (index_close-4)%50
    index_high = (index_close+3)%50
    if(index_close+2 < 50 and index_close-3 >=0):
        label[:,index_low:index_high] = 0
    else:
        label[:,index_low:50] = 0
        label[:,0:index_high] = 0
        
    return label

In [8]:
# define 80% coverage labelling functions
#import import_ipynb
#import labelling_functions_3

def findOCLFs(data,mask):
    '''
    Function computes the LFs for the given data
    Saves the numpy array for each individual PID in the folder specified
    '''
    L = np.zeros((50,5))
    L[:,0] = np.squeeze(np.transpose(findAreaLabel(mask)))
    L[:,1] = np.squeeze(np.transpose(findPerimeterLabel(mask)))
    L[:,2] = np.squeeze(np.transpose(findIntensityLabel(data,mask)))
    L[:,3] = np.squeeze(np.transpose(findIntensityLabel2(data,mask)))
    L[:,4] = np.squeeze(np.transpose(findLabelPrior()))
    #L = L + 1
    
    return L

In [9]:
# call labelling functions, define majority vote classifier 
# return prediction probabilities
import sys
sys.path.append('../heart_mri/metal')
from metal.label_model.baselines import MajorityLabelVoter

def findMajorityPred(data,mask):
    L = findOCLFs(data,mask)
    
    mv = MajorityLabelVoter(seed=123)
    prob = mv.predict_proba(L)
    
    return prob

In [10]:
# define which frames are picked using prediction probabilities 
def findClosedFrames(data,mask):
    prob = findMajorityPred(data,mask)
    
    prob = prob[:,0]
    closed_frames = [i for i,x in enumerate(prob) if x == 1.0]
    
    # returning the first 10 closed frames
    if len(closed_frames) > 10:
        closed_frames = closed_frames[:10]
        
    return closed_frames

In [11]:
def findLeftArea(mask):
    area = np.sum(np.sum(mask==1.0,axis=2),axis=1) # area of left atrium
    return area

In [12]:
def findRightArea(mask):
    area = np.sum(np.sum(mask==2.0,axis=2),axis=1) # area of left atrium
    return area

In [13]:
def findLeftPerimeter(mask_original):
    #mask = [mask_original == 1.0] 
    mask = np.copy(mask_original)
    mask[mask!=1.0] = 0.0
    mask = mask.astype(int)
    perimeter = np.zeros((50,1))
    for frame in range(50):
        regions = regionprops(mask[frame])
        perimeter[frame] = regions[0].perimeter
    
    return perimeter

In [14]:
def findRightPerimeter(mask_original):
    #mask = [mask_original == 2.0] 
    mask = np.copy(mask_original)
    mask[mask!=2.0] = 0.0
    mask = mask.astype(int)
    perimeter = np.zeros((50,1))
    for frame in range(50):
        regions = regionprops(mask[frame])
        perimeter[frame] = regions[0].perimeter
    
    return perimeter

In [15]:
def findLeftEFLabel(data,mask):
    area = findLeftArea(mask)
    ejection_fraction = min(area)/max(area)
    
    if (ejection_fraction < 0.4 ):
        label = 2 # nonMR 
    else:
        label = 0 # abstain 
    
    return label

In [16]:
def findRightEFLabel(data,mask):
    area = findRightArea(mask)
    ejection_fraction = min(area)/max(area)
    
    if ( ejection_fraction < 0.4 ):
        label = 2 # nonMR
    else: 
        label = 0 # abstain 
    
    return label

In [17]:
def findLeftAreaVarLabel(data,mask):
    area = findLeftArea(mask)
    frames = findClosedFrames(data,mask)
    area = area[frames]
    area_var = np.var(area)
    
    if ( area_var > 15000 ):
        label = 2 # nonMR
    else: 
        label = 0 # abstain 
        
    return label

In [18]:
def findRightAreaVarLabel(data,mask):
    area = findRightArea(mask)
    area_var = np.var(area)
    
    if ( area_var > 15000 ):
        label = 2 # nonMR
    else: 
        label = 0 # abstain 
        
    return label

In [19]:
def findFrameAreaRatioLabel(data,mask):
    left_area = findLeftArea(mask)
    right_area = findRightArea(mask)
    area_ratio = np.mean(np.divide(left_area,right_area))      
    
    if( area_ratio < 0.6 or area_ratio > 1.6 ):
        label = 1 # MR 
    else : 
        label = 0 # abstain 
    
    return label

In [20]:
def findAPRatioLabel(data,mask):
    area=  findLeftArea(mask)
    perimeter = findLeftPerimeter(mask)
    ratio = np.nanmean(np.divide(area, perimeter))

    if ( ratio < 4.3 ):
        label = 2 # nonMR
    else :
        label = 0 # abstain
    
    return label

In [21]:
def findAPRatioVarLabel(data,mask):
    area=  findLeftArea(mask)
    perimeter = findLeftPerimeter(mask)
    ratio = np.nanvar(np.divide(area, perimeter))

    if ( ratio > 2 ):
        label = 2 # nonMR
    else :
        label = 0 # abstain
        
    return label

In [22]:
# given centroids and mask for a single frame - 
# defines a half plane based on centroids
# multiplies with mask 
def findSubregionFrame(c_left,c_right,mask):
    '''
    c_left: (x1, y1) float
    c_right: (x2, y2) float
    mask: (208, 180) float
    '''
    x1 = c_left[0]
    y1 = c_left[1]
    
    x2 = c_right[0]
    y2 = c_right[1]
    
    slope = ( x2 - x1 ) / ( y2 - y1 ) # flipped because of row/col convention in indexing
    new_mask = np.zeros(mask.shape)
    
    for x in range(mask.shape[0]):
        for y in range(mask.shape[1]):
            if(x < slope*(y-y1) + x1):
                new_mask[x,y] = 1
                
    new_mask = new_mask*mask
    new_mask[new_mask!=1.0] = 0
    
    return new_mask

In [23]:
def findSubregion(mask_original):
    '''
    Input
    ------
    mask_original: (50, 208, 180) float
    
    
    Output
    ------
    new_mask: (50, 208, 180) float
    mask defining a smaller region
    '''
    
    # finding left and right atrium masks
    mask_left = np.copy(mask_original)
    mask_left[mask_left!=1.0] = 0.0
    mask_left = mask_left.astype(int)

    mask_right = np.copy(mask_original)
    mask_right[mask_right!=2.0] = 0.0
    mask_right = mask_right.astype(int)
    
    # finding centroids
    centroid_left = np.zeros((50,2))
    centroid_right = np.zeros((50,2))
    for frame in range(50):
        left_region = regionprops(mask_left[frame])
        centroid_left[frame] = left_region[0].centroid

        right_region = regionprops(mask_right[frame])
        centroid_right[frame] = right_region[0].centroid
        
    # finding subregions
    new_mask = np.zeros(mask_original.shape)
    for frame in range(mask_original.shape[0]):
        new_mask[frame,:,:] = findSubregionFrame(centroid_left[frame,:],centroid_right[frame,:],
                         mask_original[frame,:,:])
    
    return new_mask

In [24]:
def findIntSubregionLabel(data,mask):
    # finds average intensity defined for the left atrium subregion close to the mitral valve
    new_mask = findSubregion(mask)
    masked_data = np.multiply(data,new_mask)
    
    frames = findClosedFrames(data,mask)
    masked_data = masked_data[frames,:,:]
    masked_data = np.reshape(masked_data,(len(frames),-1))
    
    area_intensity = np.mean(masked_data[masked_data!=0.0])
    
    if(area_intensity < 2.6):
        label = 1 # MR
    elif (area_intensity < 2.8):
        label = 0 # abstain
    else:
        label = 2 # non MR
        
    return label

In [25]:
def findIntSubregionVarLabel(data,mask):
    new_mask = findSubregion(mask)
    masked_data = np.multiply(data,new_mask)
    
    frames = findClosedFrames(data,mask)
    masked_data = masked_data[frames,:,:]
    masked_data = np.reshape(masked_data,(len(frames),-1))
    
    area_intensity_var = np.var(masked_data[masked_data!=0.0])
    
    if(area_intensity_var < 1000):
        label = 1 # MR
    else:
        label = 2 # non MR 
        
    return label

In [26]:
def findMRLFs(data,mask): # ,PID,path,folder_name
    '''
    Function computes the LFs for each of the PIDs
    Saves the numpy array for each individual PID in the folder specified
    
    data: (208,180)
    mask: (208,180)
    '''
    #print(PID)
    L = np.zeros((8,))
    L[0] = findLeftEFLabel(data,mask)
    L[1] = findRightEFLabel(data,mask)
    L[2] = findLeftAreaVarLabel(data,mask)
    L[3] = findRightAreaVarLabel(data,mask)
    #L[4] = findFrameAreaRatioLabel(data,mask)
    L[4] = findAPRatioLabel(data,mask)
    L[5] = findAPRatioVarLabel(data,mask)
    L[6] = findIntSubregionLabel(data,mask)
    L[7] = findIntSubregionVarLabel(data,mask)
    
    #np.save(path+folder_name+PID+'.npy',L)
    return L 

In [27]:
# reading training data 
train_path = '../data/mr/train/'
labels = pd.read_csv(train_path+'labels.csv')

train_PIDs = labels.ID
train_labels = labels.LABEL 

MR_indices = [i for i,x in enumerate(train_labels) if x == 1.0]
nonMR_indices = [i for i,x in enumerate(train_labels) if x == 0.0]

train_MR_PIDs = train_PIDs[MR_indices[:]]
train_nonMR_PIDs = train_PIDs[nonMR_indices[:]]

In [80]:
# loading data and mask
train_data = {}
train_mask = {} 
train_Y = {}
for PID in train_MR_PIDs:
    print(PID)
    train_data[PID] = np.load(train_path+'la_4ch/'+str(PID)+'.npy')
    train_mask[PID] = np.load(train_path+'la_4ch_mask/'+str(PID)+'.npy')
    train_Y[PID] = 1  # MR  
    
for PID in train_nonMR_PIDs:
    print(PID)
    train_data[PID] = np.load(train_path+'la_4ch/'+str(PID)+'.npy')
    train_mask[PID] = np.load(train_path+'la_4ch_mask/'+str(PID)+'.npy')
    train_Y[PID] = 2 # non-MR  

3085128
5144603
3081388
1298891
5786505
3732743
3731562
2002329
4730312
3995191
3200115
3844741
4254850
2417922
4348411
4585988
3749507
3340464
1433516
5936415
1631970
2370818
1582477
5786533
1617662
2340116
2705213
5763523
4313102
3753065
3251632
1370027
2250584
4311498
5700281
1435741
1350437
2568581
3734841
4325127
3940729
4952970
2106029
1030923
4358938
2711126
2046732
3462299
3181580
1196573
4545229
1429283
5179406
2531664
1708007
1478393
4056607
4244727
4019183
2034606
5165760
4427699
1730457
3588464
2058577
5192367
1116409
1777810
2004622
3110680
4682829
1153521
5936118
1700356
2521956
4491598
4398876
2907944
3796328
1429339
2859646
3827005
2297974
5926710
1974956
2062304
4053952
1055258
5374563
3047016
5156063
5142661
4517061
3844591
4971674
2714044
1573135
2261024
2891002
1231639
3771468
1977256
5151591
2898000
5014270
5137076
2915312
3908654
5181584
5409965
2352132
4963163
3377733
2275055
2285395
3683918
2435409
4346110
2047467
1869465
4417273
4605844
1111826
3263717
6026141


In [124]:
# finding labels
train_L = np.zeros((len(train_PIDs),8))

count = 0
PID_list = train_MR_PIDs.append(train_nonMR_PIDs)
for PID in PID_list:
    print(PID)
    train_L[count,:] = findMRLFs(train_data[PID],train_mask[PID])
    count = count + 1
    


3085128
5144603
3081388
1298891
5786505
3732743
3731562
2002329
4730312
3995191
3200115
3844741
4254850
2417922
4348411
4585988
3749507
3340464
1433516
5936415
1631970
2370818
1582477
5786533
1617662
2340116
2705213
5763523
4313102
3753065
3251632
1370027
2250584
4311498
5700281
1435741
1350437
2568581
3734841
4325127
3940729
4952970
2106029
1030923
4358938
2711126
2046732
3462299
3181580
1196573
4545229
1429283
5179406
2531664
1708007
1478393
4056607
4244727
4019183
2034606
5165760
4427699
1730457
3588464
2058577
5192367
1116409
1777810
2004622
3110680
4682829
1153521
5936118
1700356
2521956
4491598
4398876
2907944
3796328
1429339
2859646
3827005
2297974
5926710
1974956
2062304
4053952
1055258
5374563
3047016
5156063
5142661
4517061
3844591
4971674
2714044
1573135
2261024
2891002
1231639
3771468
1977256
5151591
2898000
5014270
5137076
2915312
3908654
5181584
5409965
2352132
4963163
3377733
2275055
2285395
3683918
2435409
4346110
2047467
1869465
4417273
4605844
1111826
3263717
6026141


In [125]:
# generating summary
true_labels = np.array(list(train_Y.values()))

from metal.analysis import lf_summary
lf_summary(csr_matrix(train_L),csr_matrix(true_labels))

Unnamed: 0,Polarity,Coverage,Overlaps,Conflicts,Correct,Incorrect,Emp. Acc.
0,1,0.044,0.044,0.044,5,6,0.454545
1,1,0.044,0.044,0.044,6,5,0.545455
2,2,0.036,0.036,0.012,9,0,1.0
3,1,0.064,0.064,0.064,2,14,0.125
4,1,0.048,0.048,0.048,6,6,0.5
5,2,0.256,0.256,0.064,63,1,0.984375
6,2,1.0,1.0,0.268,225,25,0.9
7,"[1.0, 2.0]",1.0,1.0,0.268,195,55,0.78


In [83]:
# reading dev data 
dev_path = '../data/mr/train/'
labels = pd.read_csv(dev_path+'labels.csv')

dev_PIDs = labels.ID
dev_labels = labels.LABEL 

MR_indices = [i for i,x in enumerate(dev_labels) if x == 1.0]
nonMR_indices = [i for i,x in enumerate(dev_labels) if x == 0.0]

dev_MR_PIDs = dev_PIDs[MR_indices[:]]
dev_nonMR_PIDs = dev_PIDs[nonMR_indices[:]]

In [84]:
# loading data and mask
dev_data = {}
dev_mask = {} 
dev_Y = {}
for PID in dev_MR_PIDs:
    print(PID)
    dev_data[PID] = np.load(dev_path+'la_4ch/'+str(PID)+'.npy')
    dev_mask[PID] = np.load(dev_path+'la_4ch_mask/'+str(PID)+'.npy')
    dev_Y[PID] = 1  # MR  
    
for PID in dev_nonMR_PIDs:
    print(PID)
    dev_data[PID] = np.load(dev_path+'la_4ch/'+str(PID)+'.npy')
    dev_mask[PID] = np.load(dev_path+'la_4ch_mask/'+str(PID)+'.npy')
    dev_Y[PID] = 2 # non-MR  

3085128
5144603
3081388
1298891
5786505
3732743
3731562
2002329
4730312
3995191
3200115
3844741
4254850
2417922
4348411
4585988
3749507
3340464
1433516
5936415
1631970
2370818
1582477
5786533
1617662
2340116
2705213
5763523
4313102
3753065
3251632
1370027
2250584
4311498
5700281
1435741
1350437
2568581
3734841
4325127
3940729
4952970
2106029
1030923
4358938
2711126
2046732
3462299
3181580
1196573
4545229
1429283
5179406
2531664
1708007
1478393
4056607
4244727
4019183
2034606
5165760
4427699
1730457
3588464
2058577
5192367
1116409
1777810
2004622
3110680
4682829
1153521
5936118
1700356
2521956
4491598
4398876
2907944
3796328
1429339
2859646
3827005
2297974
5926710
1974956
2062304
4053952
1055258
5374563
3047016
5156063
5142661
4517061
3844591
4971674
2714044
1573135
2261024
2891002
1231639
3771468
1977256
5151591
2898000
5014270
5137076
2915312
3908654
5181584
5409965
2352132
4963163
3377733
2275055
2285395
3683918
2435409
4346110
2047467
1869465
4417273
4605844
1111826
3263717
6026141


In [126]:
# finding labels
dev_L = np.zeros((len(dev_PIDs),8))

count = 0
PID_list = dev_MR_PIDs.append(dev_nonMR_PIDs)
for PID in PID_list:
    print(PID)
    dev_L[count,:] = findMRLFs(dev_data[PID],dev_mask[PID])
    count = count + 1
    
    

3085128
5144603
3081388
1298891
5786505
3732743
3731562
2002329
4730312
3995191
3200115
3844741
4254850
2417922
4348411
4585988
3749507
3340464
1433516
5936415
1631970
2370818
1582477
5786533
1617662
2340116
2705213
5763523
4313102
3753065
3251632
1370027
2250584
4311498
5700281
1435741
1350437
2568581
3734841
4325127
3940729
4952970
2106029
1030923
4358938
2711126
2046732
3462299
3181580
1196573
4545229
1429283
5179406
2531664
1708007
1478393
4056607
4244727
4019183
2034606
5165760
4427699
1730457
3588464
2058577
5192367
1116409
1777810
2004622
3110680
4682829
1153521
5936118
1700356
2521956
4491598
4398876
2907944
3796328
1429339
2859646
3827005
2297974
5926710
1974956
2062304
4053952
1055258
5374563
3047016
5156063
5142661
4517061
3844591
4971674
2714044
1573135
2261024
2891002
1231639
3771468
1977256
5151591
2898000
5014270
5137076
2915312
3908654
5181584
5409965
2352132
4963163
3377733
2275055
2285395
3683918
2435409
4346110
2047467
1869465
4417273
4605844
1111826
3263717
6026141


In [127]:
# generating summary
true_labels = np.array(list(dev_Y.values()))

from metal.analysis import lf_summary
lf_summary(csr_matrix(dev_L),csr_matrix(true_labels))

Unnamed: 0,Polarity,Coverage,Overlaps,Conflicts,Correct,Incorrect,Emp. Acc.
0,1,0.044,0.044,0.044,5,6,0.454545
1,1,0.044,0.044,0.044,6,5,0.545455
2,2,0.036,0.036,0.012,9,0,1.0
3,1,0.064,0.064,0.064,2,14,0.125
4,1,0.048,0.048,0.048,6,6,0.5
5,2,0.256,0.256,0.064,63,1,0.984375
6,2,1.0,1.0,0.268,225,25,0.9
7,"[1.0, 2.0]",1.0,1.0,0.268,195,55,0.78
