In [5]:
import os 
import cv2
import pandas as pd
import numpy as np 
from tqdm import tqdm

The general flow of our algorithm will be:

1. **Step #1:** Input an image
2. **Step #2:** Construct an image pyramid
3. **Step #3:** For each scale of the image pyramid, run a sliding window
4. **Step #3a:** For each stop of the sliding window, extract the ROI
5. **Step #3b:** Take the ROI and pass it through our CNN originally trained for image classification
6. **Step #3c:** Examine the probability of the top class label of the CNN, and if meets a minimum confidence, record (1) the class label and (2) the location of the sliding window
7. **Step #4:** Apply class-wise non-maxima suppression to the bounding boxes
8. **Step #5:** Return results to calling function

In [6]:
def get_iou(box1, box2):
    # determine the (x, y)-coordinates of the intersection rectangle
    xa = max(box1[0], box2[0])
    ya = max(box1[1], box2[1])
    xb = min(box1[2], box2[2])
    yb = min(box1[3], box2[3])
    # compute the area of intersection rectangle
    interArea = max(0, xb - xa + 1) * max(0, yb - ya + 1)
    
    # compute the area of both the prediction and ground-truth
    # rectangles
    boxAArea = (box1[2] - box1[0] + 1) * (box1[3] - box1[1] + 1)
    boxBArea = (box2[2] - box2[0] + 1) * (box2[3] - box2[1] + 1)
    
    iou = interArea / float(boxAArea + boxBArea - interArea)
    # return the intersection over union value
    return iou

In [7]:
df = pd.read_csv('../100_1/PATHS.csv').drop('Unnamed: 0', axis =1)

zipped_paths = df[['filename']]

max_prop = 2000
max_pos = 30
max_neg = 5
input_dim = (224, 224)
total_pos = 0 
total_neg = 0
for idx in tqdm(range(len(df))): 
    row = df.iloc[idx]
    img_path = row.filename
    image = cv2.imread(f'../100_1/{img_path}')
    ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
    ss.setBaseImage(image)
    ss.switchToSelectiveSearchFast()
    rects = ss.process() 
    prop_rect = [] 
    
    xMin = row.xmin
    xMax = row.xmax
    yMin = row.ymin
    yMax = row.ymax
    gt_box = ((xMin, yMin, xMax, yMax))
    gt_startx, gt_starty, gt_endx, gt_endy = gt_box
    for (x,y,w,h) in rects: 
        prop_rect.append((x,y,x+w, y+h))
    positive = 0 
    negative = 0 
    for rect in prop_rect[:max_prop]: 
        startx, starty, endx, endy = rect
        iou = get_iou(gt_box, rect)
        if iou > .7 and positive == 0: 
            roi = image[starty:endy, startx:endx]
            filename = f'{total_pos}.png'
            positive += 1
            total_pos += 1 
            roi = cv2.resize(roi, input_dim, interpolation = cv2.INTER_CUBIC)
            cv2.imwrite(f'../FinalImages/Positive/{filename}', roi)
            continue
        
        fullOverlap = startx >= gt_startx
        fullOverlap = fullOverlap and starty >= gt_starty
        fullOverlap = fullOverlap and endx <= gt_endx
        fullOverlap = fullOverlap and endy <= gt_endy
        
        if not fullOverlap and iou < .05 and negative <= max_neg: 
            roi = image[starty:endy, startx: endx]
            filename = f'{total_neg}.png'
            negative += 1
            total_neg += 1
            roi = cv2.resize(roi, input_dim, interpolation = cv2.INTER_CUBIC)
            cv2.imwrite(f'../FinalImages/Negative/{filename}', roi)
            continue 
        
        
    
    



100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 136/136 [04:03<00:00,  1.79s/it]


In [None]:
df.columns