# Part 6 - Generate Prediction File

Now creating the prediction file.  First is to generate the prediction dataframe and then create a file from this dataframe.

The steps here are as follows -
1. Parse through dataframe and get patientids
2. Generate prediction on patient id
3. Create bounding boxes from prediction
4. Generate box confidences (avg pool of box region)
5. Encode bounding boxes in correct format
6. Generate output file 


In [1]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import pandas as pd
import numpy as np
import warnings
import pydicom

#Import previously created functions
from rnsa_funcs import *

import os
import gc
import itertools

#Remove GPUs
#os.environ["CUDA_VISIBLE_DEVICES"]="-1"    

#Import Tensorflow
import tensorflow as tf
from tensorflow import set_random_seed


#Import Keras
import keras
from keras import backend as K
from keras import layers
from keras.layers import Activation, ZeroPadding2D, AveragePooling2D, Add, Concatenate, SeparableConv2D
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization, Input, DepthwiseConv2D
from keras.layers import GlobalAveragePooling2D, multiply, ZeroPadding2D, MaxPooling2D, Reshape, add, Conv2DTranspose
#from keras.layers.advanced_activations import LeakyReLU, PReLU
from keras.models import Sequential, Model
from keras import regularizers
from keras.optimizers import Adam, SGD
from keras.models import load_model
from keras.initializers import glorot_uniform
#from keras.models import load_model
from keras_applications.imagenet_utils import _obtain_input_shape 
from keras.callbacks import ModelCheckpoint
#import sklearn
import sklearn as sk
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

from skimage import measure

import pickle

%matplotlib inline

warnings.filterwarnings('ignore')

Using TensorFlow backend.


In [2]:
from Unet_DS5 import *

In [3]:
from skimage.transform import resize

In [4]:
#Configuring to try to prevent OOM errors
config = tf.ConfigProto()
config.gpu_options.allow_growth = True

In [7]:
#Setup path locations
BASE_PATH = 'G:\Kaggle\RSNA_Comp'
TRAIN_PATH_RAW = 'G:\Kaggle\RSNA_Comp\stage_1_train_images'
#TEST_PATH_RAW = 'G:\Kaggle\RSNA_Comp\stage_1_test_images'
TRAIN_PATH_CLEAN = 'G:\Kaggle\RSNA_Comp\\train_image_clean'
TEST_PATH_CLEAN = 'G:\Kaggle\RSNA_Comp\\test_image_clean'
MODEL_PATH = 'G:\Kaggle\RSNA_Comp\\models'

TEST_PATH_RAW = 'G:\Kaggle\RSNA_Comp\stage_2_test_images'

In [6]:
#Import dataframes from csvs
#train_frame = pd.read_csv(os.path.join(TRAIN_PATH_CLEAN, 'train_frame.csv'))
#val_1_frame = pd.read_csv(os.path.join(TRAIN_PATH_CLEAN, 'val_1_frame.csv'))
#val_2_frame = pd.read_csv(os.path.join(TRAIN_PATH_CLEAN, 'val_2_frame.csv'))
#test_frame = pd.read_csv(os.path.join(TEST_PATH_CLEAN, 'test_frame.csv'))
#train_labels = pd.read_csv(os.path.join(BASE_PATH, 'stage_1_train_labels.csv'))
test_frame = pd.read_csv(os.path.join(BASE_PATH, 'stage_2_sample_submission.csv'))



#Import new binary frames
#train_frame_new = pd.read_csv(os.path.join(TRAIN_PATH_CLEAN, 'train_frame_bin.csv'))
#val_1_frame_new = pd.read_csv(os.path.join(TRAIN_PATH_CLEAN, 'val_1_frame_bin.csv'))

#Import train labels for drawing bounding boxes
full_labels = pd.read_csv(os.path.join(TRAIN_PATH_CLEAN, 'train_lab_df.csv'))
#val_1_lab_df = pd.read_csv(os.path.join(TRAIN_PATH_CLEAN, 'val_1_labels_tf.csv'))

## Load Model

In [8]:
test_mod = test_block()

In [9]:
model_name="unetv6.hdf5"

In [12]:
#test_mod.compile(optimizer = Adam(0.0001), loss = iou_bce_loss, metrics = [mean_iou])
test_mod.compile(optimizer = Adam(0.0001), loss = dice_bce_loss, metrics = [mean_dice])

In [13]:
test_mod.load_weights(os.path.join(MODEL_PATH, model_name))

# Load Functions

In [14]:
def Input_img_batch_BB_DS(df, label_df, pred = 0, frm_type = 0):
    #Setup Variables
    m = df.shape[0]
    out_img_batch = np.zeros((m, 256, 256))
    out_img_y = np.zeros((m, 256, 256))
    
    
    for i in range(m) :
        #Splitting patient id out for querying the labels
        if frm_type == 0 : 
            patient_id = df.iat[i, 6]
        else :
            patient_id = df.iat[i, 0]
            
            
        img_filename = patient_id + '.npy'
        
        img = np.load(os.path.join(TEST_PATH_CLEAN, img_filename))
        
        #img2 = resize(img, (224, 224), mode='reflect')

        #Transform image to normalize for model processing
        out_img_batch[i] =  resize(img, (256, 256), mode='reflect')
        
        
        #Getting labels
        if pred == 0 :
                pat_labels = label_df[label_df['patientId'] == patient_id]
                pat_labels = pat_labels.reset_index()
                out_lab = np.zeros((1024, 1024))
            
                for row in pat_labels.iterrows():
                    dat = row[1]
                    x_min = np.around(dat['x']).astype(int)
                    y_min = np.around(dat['y']).astype(int)
                
                    w = np.around(dat['width']).astype(int)
                    h = np.around(dat['height']).astype(int)
                
                    for x in range(w):
                        x_pix = np.clip(x + x_min, 0, 1023)
                        for y in range(h):
                            y_pix = np.clip(y + y_min, 0, 1023)
                            out_lab[y_pix, x_pix] = 1.0
                        
                out_img_y[i] = resize(out_lab, (256, 256), mode='reflect')
        
        

    out_img_batch = out_img_batch.reshape(m, 256, 256, 1)
    if pred != 0:
        return out_img_batch
    else :
        out_y = out_img_y.reshape(m, 256, 256, 1)
        return out_img_batch, out_y

In [17]:
def generate_pred_batch(df_in, model, msk_thresh = 0.6, area_thresh = 2500, inverse = 0, inf_dict = None):
    #Setup parameters + variables
    df_preds_out = pd.DataFrame(columns=(['patientId', 'x', 'y', 'width', 'height', 'area', 'confidence']))
    m = df_in.shape[0]
    
    #Generate predictions
    pred_img = Input_img_batch_BB_DS(df_in, full_labels, pred = 1, frm_type = inverse)
    
    predict_small = model.predict(pred_img)
    predict = resize(predict_small, (m, 1024, 1024, 1), mode='reflect')

    #For every patient generate prediction boxes
    for i in range(m):
        if inverse == 0 :
            patientid = df_in.iat[i, 6]        
        else :
            patientid = df_in.iat[i, 0]  
            
            
        pred_new = predict[i, :, :, 0]

        # threshold predicted mask
        comp1 = pred_new[:, :] > msk_thresh
        comp = measure.label(comp1)
        
        if pred_new.max() > msk_thresh : 
            for region in measure.regionprops(comp):
                y, x, y2, x2 = region.bbox
                height = y2 - y
                width = x2 - x
        
                #Generate stats about prediction - area and avg pool
                conf = np.mean(pred_new[y:y+height, x:x+width])
                area = height * width
            
                #Generate pred dataframe
                if area >= area_thresh :
                    #If not inverse flag do not transform
                    if inverse == 0 :
                        df_dict = {'patientId' : patientid, 
                               'x' : x,
                               'y' : y,
                               'width' : width,
                               'height' : height,
                               'area' : area,
                               'confidence' : conf}
                        df_preds = pd.DataFrame(df_dict, index=[0])
                        df_preds_out = df_preds_out.append(df_preds, ignore_index=True)
                    
                    #Otherwise do inverse transform to original image    
                    else :
                        box_arr = np.array([x, y, width, height])
                
                        #Perform inverse BB transform
                        box_out = invert_bb_convert(box_arr, inf_dict[patientid])
                
                        df_dict = {'patientId' : patientid, 
                               'x' : box_arr[0],
                               'y' : box_arr[1],
                               'width' : box_arr[2],
                               'height' : box_arr[3],
                               'area' : area,
                               'confidence' : conf}
                        df_preds = pd.DataFrame(df_dict, index=[0])
                        df_preds_out = df_preds_out.append(df_preds, ignore_index=True)
                
    return df_preds_out

In [18]:
def get_minibatch(df, batch_num, batch_size):
    #Get batch info
    m = df.shape[0]
    max_batchno = np.floor(m / batch_size) + 1.0
    batch_num_act = batch_num % max_batchno
    
    #List container for indices selected
    ind_list = []
    
    #Generate indices to select for this minibatch
    for i in range(batch_size):
        j = (batch_num_act*batch_size) + i
        if j < m :
            ind_list.append(j)
    
    #Select indices from dataframe
    out_df = df.iloc[ind_list]
    
    return out_df

In [19]:
def Generate_full_predframe(df, pred_mod, t_dict) :
    m = df.shape[0]
    
    num = int(m / 2.0)
    
    for i in range(num):
        pd_df = get_minibatch(df, i, 2)

        if i == 0 :
            full_pred_df = generate_pred_batch(pd_df, pred_mod, inverse = 1, inf_dict = t_dict )
        else :
            test_pred_df = generate_pred_batch(pd_df, pred_mod, inverse = 1, inf_dict = t_dict )
            full_pred_df = full_pred_df.append(test_pred_df, ignore_index=True)

    return full_pred_df

In [20]:
def Output_pred_file(df_patients, df_preds_in, conf_thresh = 0.65) :
    all_patients = df_patients['patientId'].copy()
    m = len(all_patients)
    
    #Filtering rows by confidence threshhold
    df_preds = df_preds_in[df_preds_in['confidence'] >=conf_thresh ]
    
    for i in range(m):
        pred_str = ''
        patient_id = all_patients[i]
        all_preds = df_preds[df_preds['patientId'] == patient_id]
        
        n = len(all_preds)
        
        if n >= 1 :
            for j in range(n):
                pred_str += str(all_preds.iat[j, 6]) + ' ' + str(all_preds.iat[j, 1]) + ' ' + str(all_preds.iat[j, 2]) + ' ' + str(all_preds.iat[j, 3]) + ' ' + str(all_preds.iat[j, 4]) + ' '
                
        
        sub_dict = {'patientId' : patient_id, 
                    'PredictionString' :pred_str }
        
        sub_df = pd.DataFrame(sub_dict, index=[0])
        
        if i == 0 :
            pred_file_df = sub_df.copy()
        else :
            pred_file_df = pred_file_df.append(sub_df, ignore_index=True)
    
    return pred_file_df

# Load everything else and generate predictions

In [18]:
#v2pick = open(os.path.join(TRAIN_PATH_CLEAN, 'val_2_tf.p'),'rb')
#v2_tfm = pickle.load(v2pick)
#v2pick.close()

In [19]:
#tstpick = open(os.path.join(TEST_PATH_CLEAN, 'test_set_tfs.p'),'rb')
#tst_tfm = pickle.load(tstpick)
#tstpick.close()

In [21]:
tstpick = open(os.path.join(TEST_PATH_CLEAN, 'test_set2_tfs.p'),'rb')
tst_tfm = pickle.load(tstpick)
tstpick.close()

In [22]:
test_FULL = Generate_full_predframe(test_frame, test_mod, tst_tfm)

In [23]:
out_csv_df = Output_pred_file(test_frame, test_FULL)

In [20]:
#out_csv_df = Output_pred_file(tst_inv, test_again)

In [25]:
out_csv_df.shape

(3000, 2)

In [26]:
out_csv_df.to_csv('unet_m6_stg2.csv', index=False)