<a href="https://colab.research.google.com/github/SmadarCohen111/SIGHTBIT/blob/main/SIGHTBIT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Environment installation**

In [None]:
!pip install pyyaml==5.1
# workaround: install old version of pytorch since detectron2 hasn't released packages for pytorch 1.9 (issue: https://github.com/facebookresearch/detectron2/issues/3158)
!pip install torch==1.8.0+cu101 torchvision==0.9.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html

# install detectron2 that matches pytorch 1.8
# See https://detectron2.readthedocs.io/tutorials/install.html for instructions
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.8/index.html
# exit(0)  # After installation, you need to "restart runtime" in Colab. This line can also restart runtime
!pip install numpyencoder

**import and check pytorch version**

In [None]:
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())
assert torch.__version__.startswith("1.8")   # please manually install torch 1.8 if Colab changes its default version

**import libraries and utils**

In [None]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
from numpyencoder import NumpyEncoder
import scipy.misc
import os, json, cv2, random
import requests
import glob
from google.colab.patches import cv2_imshow
from google.colab import files
from google.colab import drive


# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.data import MetadataCatalog, DatasetCatalog


**Mount drive for saving new dataset**

In [None]:
#click the link, copy the gauth and paste it here
 
drive.mount ('/content/drive')

**Help methods**

In [None]:
#remove outlairs

def reject_outliers(data, m=2):
    return data[abs(data - np.mean(data)) < m * np.std(data)]

In [None]:
def get_max_width_height(mask_idxs):
  widths = np.array([w for w,h in mask_idxs])
  widths = reject_outliers(widths)
  height = np.array([h for h,w in mask_idxs])
  height = reject_outliers(height)
  return max(widths), max(height)

In [None]:
#load json file

def resolve_json(zip_path):
  with open(zip_path, 'r') as f:
    fileJson = json.load(f)
  return fileJson

In [None]:
def modify_categories_data(categories,category_id):
    for i in categories:
      if(i['id'] == category_id):
        new_categories = i
    return new_categories

In [None]:
def modify_annotations_data(annotations, categories, image_id):
  for i in annotations:  
    if i['image_id'] == image_id:
      new_annotation = i
      list_categories = modify_categories_data(categories, i['category_id'])
  return new_annotation, list_categories

In [None]:
def modify_images_data(images, image_id, width, height):
  for img in images:
    if img['id'] == image_id:
       img['height'] = height 
       img['width']= width 
       image_data = img
  return image_data

In [None]:
# export the data to json file

def create_json_file(annotations_lists):
  with open("my.json","w") as f:
      json.dump(annotations_lists,f, indent=4, cls=NumpyEncoder)

In [None]:
# build dict for the new info of the cropped images

def build_dict(train_json, image_id, new_width,new_height):

  images_dict = modify_images_data(train_json['images'], image_id, new_width, new_height)
  annotation_dict, categories_dict = modify_annotations_data(train_json['annotations'], train_json['categories'], image_id)
  
  images_list.append(images_dict)
  annotation_list.append(annotation_dict)
  categories_list.append(categories_dict)

In [None]:
#crop the original image and calculate the new img info

def image_cropper(im, image_id, outputs, sky_id, width, height,images_list, annotation_list, categories_list, counter_img, train_json):
 
  pan_array = outputs['panoptic_seg'][0]
  sky_px = np.argwhere((pan_array.cpu().numpy() == sky_id ))

  max_width, max_height = get_max_width_height(sky_px)
  new_width = abs(width - max_width)
  new_height = abs(height - max_height)
  
  build_dict(train_json, image_id, new_width,new_height )

  cropped_img = im[max_width:, max_height:, :]
    
  a = cv2.imwrite('/content/drive/MyDrive/cropped_images/{}.jpg'.format(counter_img), cropped_img)
  print("cropped image - ")
  cv2_imshow(cropped_img)

**Image prediction, segementation and filter the images without the sky area**

In [None]:
# image prediction, segementation and filter the images without the sky area

def pred_images(im, image_id, width, height, counter_img, training_number, images_list, annotation_list, categories_list, train_json ):
  
  print("origianl image - ")
  cv2_imshow(im)
  outputs = predictor(im)
  segements_info = outputs["panoptic_seg"][1]
  
  sky_id = 0
  for sgmnt_info in segements_info:
    if (sgmnt_info['category_id'] == 40):
      sky_id = sgmnt_info['id']

  if(sky_id != 0):
    image_cropper(im,image_id, outputs, sky_id, width, height,images_list, annotation_list, categories_list, counter_img, train_json)

**extarcting the image and the data about the image**

In [None]:
# extarcting the image and the data about the image

def extract_imgs_and_info(image_id, training_number, images_list, annotation_list, categories_list,counter_img, train_json):
    
  for ix, image in enumerate(train_json['images']):
      if(image['id'] != image_id):
        continue
      else:
        width = image['width']
        height = image['height']
        img = train_json['images'][ix]
        r = requests.get(img['coco_url'], allow_redirects=True)
        img_path = ''.join([save_dir, '/', img['file_name']])
        
        with open(img_path, 'wb') as f:
          # Read Image as np array from url
          arr = np.asarray(bytearray(r.content), dtype=np.uint8)
          np_img = cv2.imdecode(arr, -1)
          pred_images(np_img, image_id, width, height, counter_img, training_number, images_list, annotation_list, categories_list, train_json)
          counter_img += 1



In [None]:
if __name__ == "__main__":
  
  #`initialize Configuration and predictor`
  cfg = get_cfg()
  cfg.merge_from_file(model_zoo.get_config_file("COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml"))
  cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml")
  predictor = DefaultPredictor(cfg)
  
  #Obtaining dataset
  #commaned those lines after the first run
  !wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip
  !unzip annotations_trainval2017.zip
  
  #directory for saving images
  path = "/content/drive/MyDrive/cropped_images"
  if not os.path.exists(path):
    os.mkdir(path)
  !mkdir get_images
  save_dir = 'get_images'

  #help vars
  
  #choose the number of samples you would like to train 
  training_number = 2
  #max images is counter
  max_images=0
  images_list = [] 
  annotation_list = [] 
  categories_list = []

  train_json = resolve_json('./annotations/instances_val2017.json')

  #itrate throgh the dataset
  for ix, imag in enumerate(train_json['annotations']):
      if(imag['category_id'] == 5):
        max_images += 1
        extract_imgs_and_info(imag['image_id'], training_number, images_list, annotation_list, categories_list,max_images, train_json)
        if max_images > training_number:
            break

  annotations_lists = {"image": images_list,"annotation": annotation_list,"categories": categories_list }
  create_json_file(annotations_lists)

