#Coders++ UI implementation for Advanced Object Detection in images

1. Done using Streamlit library in python
2. To use:
*   Sign up in Ngrok website and get an auth code inorder to access temperory public url.
*   Upload the testing data in content bar.
*   Install libraries: Stremlit and PyNgrok.




#Installing Libraries

*   PyTorch version: 1.8 supported by CUDA version: 10.1 (along with torchvision) 
*   Detectron2 compatible with above PyTorch version.



In [None]:
!pip install pyyaml==5.1
!pip install torch==1.8.0+cu101 torchvision==0.9.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html

!pip install streamlit
!pip install pyngrok

!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.8/index.html
# Restart runtime after installing libraries

Looking in links: https://download.pytorch.org/whl/torch_stable.html
Looking in links: https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.8/index.html


In [None]:
#from google.colab import drive
#drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#PAGE_CONFIG = {"page_title":"StColab.io","page_icon":":smiley:","layout":"centered"}
#st.beta_set_page_config(**PAGE_CONFIG)

In [None]:
%%writefile app.py
import streamlit as st
import cv2
import os
import io
import time
import numpy as np
from PIL import Image
from google.colab.patches import cv2_imshow
from detectron2.engine import DefaultTrainer, DefaultPredictor
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.structures import BoxMode
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.utils.visualizer import ColorMode, Visualizer
from google.colab.patches import cv2_imshow

# this is a function to get partitions that we can apply based on the image resolution
# it is generic to some extent for most of the image resolutions
def get_partitions(img, dim):
  dims = [3, 4, 5, 6]
  partitions = []
  for i in range(len(dims)):
    if dim % dims[i] == 0:
      division = dims[i]
      gap = dim // dims[i]
      for j in range(dims[i] + 1):
        partitions.append(gap * j)
      partitions[-1] -= 1
      return partitions, division

# converting image to grid image
# getting grid numbers and rows and columns partitioned
def img2grid(img):
  h, w, channels = img.shape
  x,rows = get_partitions(img, h)
  y,columns = get_partitions(img, w)
  #plt.imshow(img)
  grid = []
  for i in range(len(x) - 1):
    for j in range(len(y) - 1):
      grid.append(img[x[i]: x[i+1], y[j]: y[j+1]])
  grid = np.array(grid)
  return grid, rows, columns

st.set_option('deprecation.showfileUploaderEncoding',False)

classes = ['car', 'autorickshaw', 'motorbike', 'building', 'bridge', 'truck', 'person', 'bus', 'traffic light', 'traffic sign']
classes.sort()

# config file for prediction
cfg = get_cfg()
# merge both the previous config file from training and the current one
cfg.merge_from_file("/content/drive/MyDrive/Coders++/new_model/output.yaml")
# add the model weights created from training
cfg.MODEL.WEIGHTS = os.path.join("/content/drive/MyDrive/Coders++/new_model/model_final.pth")
cfg.MODEL.DEVICE='cpu'
# threshold for detecting objects
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
# creation of predictor object based on new config file with retrained neural network and configurations
predictor = DefaultPredictor(cfg)

st.title("Object Detection in the Image divided as Grids")
image_file = st.file_uploader("Upload an image", type=["png","jpg","jpeg"])
if image_file is not None:
  pathh = '/content/drive/MyDrive/Coders++/Dataset_IDD/test/'
  img = cv2.imread(pathh+(image_file.name))
  st.image(img)
  #img.shape


  # predicting an image: return object will be of certain format associated with detectron2
  outputs = predictor(img)

  # Note: outputs come in BGR format. It is the reason we are using cv2 for reading the image as cv2 stays with BGR format.

  # creating visualizer object: parameters: Image - (entire row, entire column and 3rd dimension in reverse order - BGR to RGB)
  #                                         Metadata - entire classes
  #                                         Scale - Scaling for the image
  #                                         Instance_mode - Colors associated with the image                                            
  m_data = MetadataCatalog.get("category_train").set(thing_classes = classes)
  v = Visualizer(img[:, :, ::-1],
                metadata = m_data, 
                scale = 1, 
                instance_mode = ColorMode.SEGMENTATION # removes the colors of unsegmented pixels
    )

  # Drawing the prediction instances on the image based on the outputs generated
  v = v.draw_instance_predictions(outputs["instances"].to("cpu"))

  # Note: Certainly, process will use GPU - CUDA for prediction also. To leave the work on GPU, we can use CPU for prediction using .to('cpu)

  # again converting RGB to BGR as cv2 supports BGR formats
  img = v.get_image()[:, :, ::-1]
  #st.image(img[:, :, ::-1])
  #content = image_file.getvalue()
  #img = '/content/drive/MyDrive/Coders++/Dataset_IDD/test/' + image_file.agrs['name']
  grid_no=0
  grid, rows, columns = img2grid(img)
  #print(grid.shape)
  lst = []
  #print(lst)
  for i in range(rows):
    for j in range(columns):
      # plotting the images into subplots
      #plt.subplot(rows, columns, grid_no+1)
      lst.append(grid[grid_no])
      #print(1)
      #cv2_imshow(lst[i][j])
      grid_no += 1
  #cols = st.columns(3)
  k = 0
  while k < len(lst):
    cols = st.columns(columns)
    for h in  range(columns): 
      cols[h].image(lst[k])
      k += 1

  # classes detected in the image
  classes_detected = outputs['instances'].pred_classes
  # creating a dictionary for every class detected
  dic ={}
  classes_detected = classes_detected.tolist()
  set_detected = list(set(classes_detected))
  print(set_detected)
  for i in range(len(set_detected)):
    dic[set_detected[i]] = []
  mask_array_instance = []
  # manipulating the mask array based on requirement
  mask_array = outputs['instances'].pred_masks.to('cpu').numpy()
  print(mask_array.shape)
  mask_array = np.moveaxis(mask_array, 0, -1)
  #print(mask_array.shape)
  mask_array_instance = []
  # for reference, printing every mask that is predicted
  for i in range(len(classes_detected)):
    mask_array_instance.append(mask_array[:, :, i:(i+1)])
  complete_img = np.zeros_like(img)
  for i in range(len(classes_detected)):
    output = np.zeros_like(img) #black
    complete_img = np.where(mask_array_instance[i] == True, 255, complete_img)
    mask_array_instance.append(mask_array[:, :, i:(i+1)])
    output = np.where(mask_array_instance[i] == True, 255, output) 
    grids_array,rows,columns = img2grid(output)
    grid = grids_array
    grid_no=0
    total_area = columns*rows
    for x in range(rows):
      for y in range(columns):
        grids_array[grid_no] = grids_array[grid_no][:,:,0]
        #2d to 1d 
        flat_list = list(grids_array[grid_no].flatten())
        count = flat_list.count(255.0)
        if (count/total_area)>0.1:
          dic[classes_detected[i]].append(grid_no)
        grid_no+=1
  # objects falled in various grid cells
  result = {}
  for clas in dic.keys():
    result[classes[clas]] = list(set((dic[clas])))

  grids = {}
  for i in range(rows*columns):
    grids[i] = []
  for key in result:
    for value in result[key]:
      grids[value].append(key)
  op = {}
  for key in grids:
    op["grid " + str(key+1)] = grids[key]
  ops = list(op.items())
  k = 0
  lst = []
  # Select the object to find resultant grids
  st.subheader("Select the object to find resultant grids:")
  obj = st.radio(
     "",
     ('car', 'autorickshaw', 'motorbike', 'building', 'bridge', 'truck', 'person', 'bus', 'traffic light', 'traffic sign'))
  if obj in result.keys():
    grids, rows, columns = img2grid(img[:,:,::-1])
    grid_no=0
    for i in range(rows):
      for j in range(columns):
        if grid_no in result[obj]:
          lst.append(cv2.copyMakeBorder(grids[grid_no], 15, 15, 15, 15, cv2.BORDER_CONSTANT, value=[255,0,0]))
        else:
          lst.append(grids[grid_no])
        grid_no+=1
  # Writing the result
  k = 0
  while k < len(lst):
    cols = st.columns(columns)
    for h in  range(columns): 
      cols[h].image(lst[k])
      k += 1

Overwriting app.py


In [None]:
!ngrok authtoken <<Your Auth Key>>  ## Sign up in https://ngrok.com/ to get your free key

Authtoken saved to configuration file: /root/.ngrok2/ngrok.yml


In [None]:
from pyngrok import ngrok
!nohup streamlit run app.py &
url = ngrok.connect(port = 8501)
print(url)
!streamlit run --server.port 80 app.py >/dev/null

nohup: appending output to 'nohup.out'
NgrokTunnel: "http://34dd-34-125-143-136.ngrok.io" -> "http://localhost:80"
2021-12-10 06:12:10.639 [Checkpointer] Loading from /content/drive/MyDrive/Coders++/new_model/model_final.pth ...
2021-12-10 06:12:38.938 [Checkpointer] Loading from /content/drive/MyDrive/Coders++/new_model/model_final.pth ...
  grid = np.array(grid)
2021-12-10 06:14:01.148 [Checkpointer] Loading from /content/drive/MyDrive/Coders++/new_model/model_final.pth ...
  grid = np.array(grid)
2021-12-10 06:27:24.266 [Checkpointer] Loading from /content/drive/MyDrive/Coders++/new_model/model_final.pth ...
2021-12-10 06:29:01.529 [Checkpointer] Loading from /content/drive/MyDrive/Coders++/new_model/model_final.pth ...
2021-12-10 06:29:01.894 Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/streamlit/script_runner.py", line 354, in _run_script
    exec(code, module.__dict__)
  File "/content/app.py", line 67, in <module>
    st.image(img)
  File "/u

In [None]:
#ngrok.kill()  