## Generate Dataset: ##

Generate bounding box groundtruth for object detection. An image consists of 17 vertebrae, each considered as different objects. Using the corner landmark-coordinates, bound each object with rectangular box. Every image will have 17 objects, each belonging to  different class (17 classes in total), with 4 bounding box coordinates for each object. This script will output a csv file with column headers as: <br/>``` [ image_name, xmin, ymin, xmax, ymax, label ] ```

In [1]:
import numpy as np
import cv2
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm
import os

In [None]:
### visualize the bounding boxes

filename = "sunhl-1th-10-Jan-2017-254 A AP.jpg"
image_directory= "C:/Users/Brinda Khanal/Downloads/scoliosis xray Single View/boostnet_labeldata/data/training/"
label_directory= "C:/Users/Brinda Khanal/Downloads/scoliosis xray Single View/boostnet_labeldata/labels/training/"
image = image_directory+filename


img = cv2.imread(image)

data= pd.read_csv(label_directory+"landmarks.csv",header= None)
filename_labels= pd.read_csv(label_directory+"filenames.csv",header= None)

print ("image shape:",img.shape)
indx= filename_labels[filename_labels.iloc[:,0]== filename].index.tolist()
landmark= data.iloc[indx[0]].values
for m in range(0,68):
    cv2.circle(img,(int(img.shape[1]*landmark[m]),int(img.shape[0]*landmark[m+68])), 10, (255,255,255), -1)

landmark = [[int(round(img.shape[1]*landmark[m])),int(round(img.shape[0]*landmark[m+68]))] for m in range (0,68)]
#print (landmark)


N=4    
corners = [landmark[n:n+N] for n in range(0, len(landmark), N)]
corners= np.array(corners)

avg=[]
for box in corners:
    x,y,w,h = cv2.boundingRect(box)    
    cv2.rectangle(img,(x-10,y-10),(x+w+10,y+h+10),(0,255,0),5)
    avg.append((w+10,h+10))
    print (x-10,y-10,w+10,h+10)
    
print (np.mean(avg,axis=0))

plt.figure(1, figsize=(25,25))


plt.subplot(211)
plt.imshow(img[:,:,::-1])


### Save all the bounding boxes to visiualize

In [11]:
def visualize_all_bounding_box(image_directory, filenames_csv, landmarks_csv, save_path, split_type= 'train'):
    
    landmarks_data= pd.read_csv(landmarks_csv,header= None)
    filename_labels= pd.read_csv(filenames_csv,header= None)

    
    for i, names in enumerate(filename_labels.iloc[:,0]):

        img = cv2.imread(image_directory+names)
        print (image_directory+names)
        print (names)
        #print ("image shape",img.shape)
        landmarks = landmarks_data.loc[i].values
        landmarks = [[int(round(img.shape[1]*landmarks[m])),int(round(img.shape[0]*landmarks[m+68]))] for m in range (0,68)]
        
        # group landmark coordinates, each group has 4 points that represents a vertebra
        N=4     
        box = [landmarks[n:n+N] for n in range(0, len(landmarks), N)]
        #print (box)
        box= np.array(box)
        
        for c, box_coordinates in enumerate(box):
            x,y,w,h = cv2.boundingRect(box_coordinates) 
            cv2.rectangle(img,(x-10,y-10),(x+w+10,y+h+10),(0,255,0),5)
        cv2.imwrite(save_path+split_type+'/'+names,img)
           

In [None]:
ROOT_PATH = "C:/Users/Brinda Khanal/Documents/Bidur Git Repo/Spine_Challenge/all landmark estimation/"
train_data_directory = os.path.join(ROOT_PATH, "groundtruth for 68 landmarks detection/train/")
#val_data_directory = os.path.join(ROOT_PATH, "data/val/")
train_label_directory=os.path.join(ROOT_PATH, "groundtruth for 68 landmarks detection/")
#val_label_directory =os.path.join(ROOT_PATH, "data/labels/val/")
save_path="C:/Users/Brinda Khanal/Documents/Bidur Git Repo/Spine_Challenge/all landmark estimation/visualize boxes/"

### call make_csv function to create dataset in format supported by luminoth library

visualize_all_bounding_box(train_data_directory,os.path.join(train_label_directory,'train_filenames.csv'),
                           os.path.join(train_label_directory,'predicted_train_landmarks.csv'),save_path, 'train')


#visualize_all_bounding_box(val_data_directory,os.path.join(val_label_directory,'filenames.csv'),
#                           os.path.join(val_label_directory,'landmarks.csv'),save_path, 'val')


In [None]:
def make_csv_bounding_box(image_directory, filenames_csv, landmarks_csv, split_type= 'train'):
    
    landmarks_data= pd.read_csv(landmarks_csv,header= None)
    filename_labels= pd.read_csv(filenames_csv,header= None)
    
    df= pd.DataFrame(columns=['image_id', 'xmin','ymin','xmax','ymax','label'])

    
    for i, names in enumerate(filename_labels.iloc[:,0]):

        img = cv2.imread(image_directory+names)
        print (names)
        #print ("image shape",img.shape)
        landmarks = landmarks_data.loc[i].values
        landmarks = [[int(round(img.shape[1]*landmarks[m])),int(round(img.shape[0]*landmarks[m+68]))] for m in range (0,68)]
        
        # group landmark coordinates, each group has 4 points that represents a vertebra
        N=4     
        box = [landmarks[n:n+N] for n in range(0, len(landmarks), N)]
        #print (box)
        box= np.array(box)
        
        for c, box_coordinates in enumerate(box):
            x,y,w,h = cv2.boundingRect(box_coordinates)
            if c < 12:
                df= df.append({'image_id': names, 'xmin': x-50, 'ymin': y-10, 
                               'xmax': x+w+50,'ymax':y+h+10, 'label':1}, ignore_index=True) # increase the area of bounding rectangle if required
            else:
                df= df.append({'image_id': names, 'xmin': x-50, 'ymin': y-10, 
                               'xmax': x+w+50,'ymax':y+h+10, 'label':2}, ignore_index=True) # increase the area of bounding rectangle if required
                
    csv_file= split_type + ".csv"
    df.to_csv(csv_file,index= False)

        
        

In [None]:
ROOT_PATH = "C:/Users/Brinda Khanal/Documents/Bidur Git Repo/Spine_Challenge/Object detection/"
train_data_directory = os.path.join(ROOT_PATH, "data/train/")
val_data_directory = os.path.join(ROOT_PATH, "data/val/")
train_label_directory=os.path.join(ROOT_PATH, "data/labels/train/")
val_label_directory =os.path.join(ROOT_PATH, "data/labels/val/")

### call make_csv function to create dataset in format supported by luminoth library

make_csv_bounding_box(train_data_directory,os.path.join(train_label_directory,'filenames.csv'),
         os.path.join(train_label_directory,'landmarks.csv'), 'train')


make_csv_bounding_box(val_data_directory,os.path.join(val_label_directory,'filenames.csv'),
         os.path.join(val_label_directory,'landmarks.csv'), 'val')



## Generate GroundTruth for Landmark Prediction from Patch ##
 Using each vertebra bounding-box, generate patch-image (1 image will generate 17 patch-images). Find landmark-groundtruth-coordinates (between 0 and 1) for each patch.Save the groundtruth and patches.

In [2]:
def make_csv_landmark(image_directory, filenames_csv, landmarks_csv, split_type= 'train'):
    
    landmarks_data= pd.read_csv(landmarks_csv,header= None)
    filename_labels= pd.read_csv(filenames_csv,header= None)
    
    df= pd.DataFrame(columns=['image_id', 'x1','y1','x2','y2','x3','y3','x4','y4'])

    
    for i, names in enumerate(filename_labels.iloc[:,0]):

        img = cv2.imread(image_directory+names)
        
        landmarks = landmarks_data.loc[i].values
        landmarks = [[int(round(img.shape[1]*landmarks[m])),int(round(img.shape[0]*landmarks[m+68]))] for m in range (0,68)]
        
        # group landmark coordinates, each group has 4 points that represents a vertebra
        N=4     
        box = [landmarks[n:n+N] for n in range(0, len(landmarks), N)]
        #print (box)
        box= np.array(box)
        
        
        for c, box_coordinates in enumerate(box):
            print (box_coordinates)
            x_,y_,w_,h_ = cv2.boundingRect(box_coordinates)
            
            _increase_w = 50 #increase bounding box by certain pixels 
            _increase_h = 10
            
            # if increasing bounding box result in region outside the image,donot perform increment
            if (x_-_increase_w) <0 :
                _increase_w=0
            if (y_-_increase_h) <0 :
                _increase_h=0
            
            
            patch_image = crop_patch (img, x_,y_,w_,h_, increase_w=_increase_w, increase_h=_increase_h)
            
            adjusted_landmarks = adjust_landmarks_position(patch_image, box_coordinates, increase_w= _increase_w,
                                        increase_h= _increase_h ,x=x_,y=y_,w=w_, h= h_)
            
            
            patch_name = names.replace('.jpg', '')+'_'+str(c)+'.jpg' # filename for each patch
            print ("For Patch: ",patch_name)
            
           
            #resized_patch= cv2.resize(patch_image,(200,120),interpolation=cv2.INTER_AREA) #resize the patches to a fixed size
            
            
            # create a dictionary to append into dataframe row
            adjusted_landmarks_= adjusted_landmarks.ravel()
            adjusted_landmarks_=np.append(adjusted_landmarks_,patch_name)
            keywords= ['x1','y1','x2','y2','x3','y3','x4','y4','image_id']
            adjusted_landmarks_dict= dict(zip(keywords,adjusted_landmarks_))
            
            
            print (adjusted_landmarks_dict)
            
            ''''# for visualization of landmark
            adjusted_landmarks[:,0]= adjusted_landmarks[:,0]*resized_patch.shape[1]
            adjusted_landmarks[:,1]= adjusted_landmarks[:,1]*resized_patch.shape[0]
            
            
            for points in adjusted_landmarks:
                cv2.circle(resized_patch,(int(points[0]),int(points[1])), 3, (255,255,255), -1)
                print (points)'''
            
            save_path= split_type+'_patch_images/'+patch_name
            cv2.imwrite(save_path, patch_image)
            
    
            df= df.append(adjusted_landmarks_dict, ignore_index=True) 
            
            
    csv_file= split_type+ '_patches_gnd'+ ".csv"
    df.to_csv(csv_file,index= False)

In [3]:
def adjust_landmarks_position (patch_image, box_coordinates,x,y,w,h, increase_w =10, increase_h= 10):
    
    box_coordinates = box_coordinates.astype(float)

    
    # re-adjust the landmark coordinates in relation to single patch
    box_coordinates[:,0]= (box_coordinates[:,0]-(x-increase_w))/ patch_image.shape[1]
    box_coordinates[:,1]= (box_coordinates[:,1]-(y-increase_h))/ patch_image.shape[0]
      
    return box_coordinates
        
    

In [4]:
def crop_patch(image, x,y,w,h, increase_w = 10, increase_h=10):
    img_copy= np.copy(image)
    patch_image = img_copy[y-increase_h:y+h+increase_h,x-increase_w:x+w+increase_w]
    return patch_image
    

### Run this to generate patch images for train and validation, and a correponding csv file with landmark position (groundtruth)

In [None]:
ROOT_PATH = "C:/Users/Brinda Khanal/Documents/Bidur Git Repo/Spine_Challenge/Object detection/"
train_data_directory = os.path.join(ROOT_PATH, "data/train/")
val_data_directory = os.path.join(ROOT_PATH, "data/val/")
train_label_directory=os.path.join(ROOT_PATH, "data/labels/train/")
val_label_directory =os.path.join(ROOT_PATH, "data/labels/val/")



make_csv_landmark(train_data_directory,os.path.join(train_label_directory,'filenames.csv'),
         os.path.join(train_label_directory,'landmarks.csv'), 'train')


make_csv_landmark(val_data_directory,os.path.join(val_label_directory,'filenames.csv'),
         os.path.join(val_label_directory,'landmarks.csv'), 'val')

## Generate Ground Truth for Combined Landmarks detection ##

In [22]:
def generate_spine_image(image_directory, filenames_csv, landmarks_csv, save_path, split_type= 'train'):
    
    landmarks_data= pd.read_csv(landmarks_csv,header= None)
    filename_labels= pd.read_csv(filenames_csv,header= None)
    

    
    for i, names in enumerate(filename_labels.iloc[:,0]):

        img = cv2.imread(image_directory+names)
        print (names)
        #print ("image shape",img.shape)
        landmarks = landmarks_data.loc[i].values
        landmark = [[int(round(img.shape[1]*landmarks[m])),int(round(img.shape[0]*landmarks[m+68]))] for m in range (0,68)]

        # group 4 corner landmarks to form box
        N=4    
        landmark_corners = [landmark[n:n+N] for n in range(0, len(landmark), N)]
        landmark_corners= np.array(landmark_corners)
        boxes= []

        
        blank_image= np.zeros(img.shape,np.uint8)
        for box in landmark_corners:
            x,y,w,h = cv2.boundingRect(box)
            cv2.rectangle(blank_image,(x-50,y-50),(x+w+50,y+h+50),(255,255,255),-1)
    
        kernel = np.ones((10,10),np.uint8)
        dilated = cv2.dilate(blank_image,kernel,iterations = 5)
        
        
        masked_image = cv2.bitwise_and(img,dilated)
        masked = np.ma.array(data= img, mask= ~dilated.astype(bool))
        mean= np.mean(masked)
        print (mean)
        img[dilated==0]=mean
        cv2.imwrite(save_path+split_type+'/'+names,img)
        

       

In [None]:
ROOT_PATH = "C:/Users/Brinda Khanal/Documents/Bidur Git Repo/Spine_Challenge/Object detection/"
train_data_directory = os.path.join(ROOT_PATH, "data/train/")
val_data_directory = os.path.join(ROOT_PATH, "data/val/")
train_label_directory=os.path.join(ROOT_PATH, "data/labels/train/")
val_label_directory =os.path.join(ROOT_PATH, "data/labels/val/")
save_path='C:/Users/Brinda Khanal/Documents/Bidur Git Repo/Spine_Challenge/all landmark estimation/groundtruth for 68 landmarks detection/' 



generate_spine_image(train_data_directory,os.path.join(train_label_directory,'filenames.csv'),
         os.path.join(train_label_directory,'landmarks.csv'), save_path,'train')


generate_spine_image(val_data_directory,os.path.join(val_label_directory,'filenames.csv'),
         os.path.join(val_label_directory,'landmarks.csv'), save_path,'val')