In [1]:
from utils.imports import *

Using TensorFlow backend.


In [2]:
def getRegionFromMap(slice_npy):
    #thr = np.where(slice_npy > np.mean(slice_npy),0.,1.0)
    label_image = measure.label(slice_npy)
    labels = label_image.astype(int)
    regions = measure.regionprops(labels)
    return regions

def getRegionMetricRow(fname):
    # fname, numpy array of dimension [#slices, 1, 512, 512] containing the images
    
    seg = np.load(fname)
    nslices = seg.shape[0]
    
    #metrics
    totalArea = 0.
    avgArea = 0.
    maxArea = 0.
    avgEcc = 0.
    avgEquivlentDiameter = 0.
    stdEquivlentDiameter = 0.
    weightedX = 0.
    weightedY = 0.
    numNodes = 0.
    numNodesperSlice = 0.
    # crude hueristic to filter some bad segmentaitons
    # do not allow any nodes to be larger than 10% of the pixels to eliminate background regions
    maxAllowedArea = 0.10 * 512 * 512 
    
    areas = []
    eqDiameters = []
    seg = np.expand_dims(seg,1)
    for slicen in range(nslices):
        regions = getRegionFromMap(seg[slicen,0,:,:])
        for region in regions:
            if region.area > maxAllowedArea:
                continue
            totalArea += region.area
            areas.append(region.area)
            avgEcc += region.eccentricity
            avgEquivlentDiameter += region.equivalent_diameter
            eqDiameters.append(region.equivalent_diameter)
            weightedX += region.centroid[0]*region.area
            weightedY += region.centroid[1]*region.area
            numNodes += 1
    if totalArea == 0 or numNodes == 0:
        os.remove(fname)
    else:
        weightedX = weightedX / totalArea 
        weightedY = weightedY / totalArea
        avgArea = totalArea / numNodes
        avgEcc = avgEcc / numNodes
        avgEquivlentDiameter = avgEquivlentDiameter / numNodes
        stdEquivlentDiameter = np.std(eqDiameters)

        maxArea = max(areas)


        numNodesperSlice = numNodes*1. / nslices


        return np.array([avgArea,maxArea,avgEcc,avgEquivlentDiameter,\
                         stdEquivlentDiameter, weightedX, weightedY, numNodes, numNodesperSlice])

def createFeatureDataset(nodfiles_true,nodfiles_false):
    numfeatures = 9
    feature_array = np.zeros((len(nodfiles_true)+len(nodfiles_false),numfeatures))
    truth_metric = np.zeros((len(nodfiles_true)+len(nodfiles_false)))
    
    for i,nodfile in enumerate(tqdm(nodfiles_true)):
        patID = nodfile.split("_")[3][-10:]
        truth_metric[i] = 1
        feature_array[i] = getRegionMetricRow(nodfiles_true[i])
    for i,nodfile in enumerate(tqdm(nodfiles_false)):
        patID = nodfile.split("_")[3][-10:]
        truth_metric[len(nodfiles_true)+i] = 0
        feature_array[len(nodfiles_true)+i] = getRegionMetricRow(nodfiles_false[i])   
    np.save("dataY.npy", truth_metric)
    np.save("dataX.npy", feature_array)



In [3]:
csv_path = PATH['annotations_test']
src = PATH['model_test_pred']
pred_csv_path = PATH['model_test_pred']
data_path = PATH['src_test']

In [4]:
model_paths = PATH['model_paths']
model_final = PATH['model_final']

In [5]:
clf = joblib.load('classifymodel.pkl')

In [6]:
test_pred_0 = pd.read_csv(pred_csv_path + "1final_test_result.csv")


In [8]:
patients = [x for x in os.listdir(pred_csv_path) if 'orig' in x]    

In [9]:
test_pred_0["file"] = test_pred_0["seriesuid"].map(lambda file_name: get_filename(patients, file_name))
test_pred_0 = test_pred_0.dropna()

In [10]:
probability_30_30_30_cube = []

average = []

for img_file in tqdm(sorted(patients)):
    mini_df_anno = test_pred_0[test_pred_0["file"]==img_file] #get all nodules associate with file
    if mini_df_anno.shape[0]>0: # some files may not have a nodule--skipping those 
        # load the data once        
        patient_id = img_file[:-9]
        img_array = np.load(src + img_file)
        pos_annos = pd.read_csv(src + img_file[:-9] + '_annos_pos.csv')
        origin = np.array([pos_annos.loc[0]['origin_x'],pos_annos.loc[0]['origin_y'],pos_annos.loc[0]['origin_z']]) 
        spacing = np.array([pos_annos.loc[0]['spacing_x'],pos_annos.loc[0]['spacing_y'],pos_annos.loc[0]['spacing_z']])
        img_array = normalize(img_array)                
        for node_idx1, cur_row1 in mini_df_anno.iterrows():       
            node_x = cur_row1["coordX"]
            node_y = cur_row1["coordY"]
            node_z = cur_row1["coordZ"]
            diam = cur_row1["diameter_mm"]
            center = np.array([node_x, node_y, node_z])   # nodule center
            v_center = np.rint(np.absolute(center-origin)/spacing)            
            new_x = int(v_center[0])
            new_y = int(v_center[1])
            new_z = int(v_center[2])
            
            if new_z<18 or new_x<18 or new_y<18 or new_x+18>img_array.shape[2] or new_y+18>img_array.shape[1] or new_z+18>img_array.shape[0]:
                cls_result_cube_30 = int(0)
            else:
                seg =  img_array[new_z - 18: new_z + 18,
                                    new_y - 18 : new_y + 18,
                                    new_x - 18 : new_x + 18]    

                nslices = seg.shape[0]

                #metrics
                totalArea = 0.
                avgArea = 0.
                maxArea = 0.
                avgEcc = 0.
                avgEquivlentDiameter = 0.
                stdEquivlentDiameter = 0.
                weightedX = 0.
                weightedY = 0.
                numNodes = 0.
                numNodesperSlice = 0.
                # crude hueristic to filter some bad segmentaitons
                # do not allow any nodes to be larger than 10% of the pixels to eliminate background regions
                maxAllowedArea = 0.10 * 512 * 512 

                areas = []
                eqDiameters = []
                seg = np.expand_dims(seg,1)
                for slicen in range(nslices):
                    regions = getRegionFromMap(seg[slicen,0,:,:])
                    for region in regions:
                        if region.area > maxAllowedArea:
                            continue
                        totalArea += region.area
                        areas.append(region.area)
                        avgEcc += region.eccentricity
                        avgEquivlentDiameter += region.equivalent_diameter
                        eqDiameters.append(region.equivalent_diameter)
                        weightedX += region.centroid[0]*region.area
                        weightedY += region.centroid[1]*region.area
                        numNodes += 1
                if totalArea == 0 or numNodes == 0:
                    cls_result_cube_30 = int(0)
                else:
                    weightedX = weightedX / totalArea 
                    weightedY = weightedY / totalArea
                    avgArea = totalArea / numNodes
                    avgEcc = avgEcc / numNodes
                    avgEquivlentDiameter = avgEquivlentDiameter / numNodes
                    stdEquivlentDiameter = np.std(eqDiameters)

                    maxArea = max(areas)


                    numNodesperSlice = numNodes*1. / nslices


                    feature =  np.array([avgArea,maxArea,avgEcc,avgEquivlentDiameter,\
                                     stdEquivlentDiameter, weightedX, weightedY, numNodes, numNodesperSlice])
                    cls_result_cube_30 = clf.predict_proba(np.expand_dims(feature,0))[0][1]
            probability_30_30_30_cube.append(cls_result_cube_30)


100%|██████████| 800/800 [50:09<00:00,  3.76s/it]  


In [11]:
probability_30_30_30_cube = np.array(probability_30_30_30_cube)
probability_30_30_30_cube = probability_30_30_30_cube.clip(0.005,0.995)
probability_30_30_30_cube = probability_30_30_30_cube.round(3)
test_pred_0['probability'] = probability_30_30_30_cube

In [12]:
test_pred_0.to_csv(csv_path + "0625final.csv", index=False)