# Image Chipper
This Jupyter Notebook file takes in a tif image file, either defined by the user or chosen at random from a folder, and chips the image(s) into smaller chips, which can then be used for training/testing datasets in deep learning algorithms.

## Import Packages and GeoJSON File

In [1]:
# Packages

import rasterio
import geopandas as gpd
import pandas as pd
import numpy as np
from PIL import Image
import os

In [2]:
# Enter the path to the GeoJSON file associated with the tif image(s) you will be using

gdf = gpd.read_file(open('./image_tiles/1/boundingboxes-all-damagearea-pixelcoords.geojson'))

## Select tif File
You may choose either a single tif file, or a folder of tif files.

### Single tif File

In [3]:
# Run this cell if you will chip a single tif file. Be sure to enter the full file path!

mytif = './image_tiles/1/20170830aC0952830w294630n_1_2.tif'

### Folder of tif Files

In [None]:
# Run this cell if you will chip an image of tif files. Be sure to enter the file path of the folder!

tif_folder = './image_tiles/1'

## Helper Functions
The following cells are used to define helper functions to be used during the chipping process; please be sure to run all of them.

In [4]:
'''
Obtains the tif file name, rasterio image object, and the array represenation of the given tif
file.
'''

def get_image_resources(tif):
  img = rasterio.open(tif, 'r')
  tif_name = tif.split('/')[3]
  image = Image.open(tif)
  image_array = np.array(image)
  return tif_name, img, image_array

In [5]:
'''
Takes in the image file, extracts its bounding box coordinates, and returns an (N, 4) array containing
the coordinates.
'''

def get_coordinates(img, tif_name):
  extent = [img.bounds[0], img.bounds[2], img.bounds[1], img.bounds[3]]
  xmin, xmax, ymin, ymax = extent
  gdf_array = gdf.cx[xmin:xmax, ymin:ymax]
  gdf_array_filtered = gdf_array[gdf_array['image'].str.contains(tif_name)]
  tif_coords = gdf_array_filtered['bb']
  tif_coords = tif_coords.reset_index(drop = True)
  coordinates = np.zeros((tif_coords.shape[0], 4), dtype = int)
  for i in range(len(tif_coords)):
    curr_coord = tif_coords[i]
    curr_coord_list = curr_coord[1:-1].split(', ')
    xmin, xmax = curr_coord_list[0], curr_coord_list[1]
    ymin, ymax = curr_coord_list[2], curr_coord_list[3]
    coordinates[i] = [xmin, ymin, xmax, ymax]
  return coordinates

In [6]:
'''
Takes in the image file, extracts every class that corresponds to each bounding box, and returns an
(N, 1) array containing the classes.
'''

def get_classes(img, tif_name):
  extent = [img.bounds[0], img.bounds[2], img.bounds[1], img.bounds[3]]
  xmin, xmax, ymin, ymax = extent
  gdf_array = gdf.cx[xmin:xmax, ymin:ymax]
  gdf_array_filtered = gdf_array[gdf_array['image'].str.contains(tif_name)]
  tif_classes = gdf_array_filtered['damageleve']
  tif_classes = tif_classes.reset_index(drop = True)
  classes = np.empty((len(tif_classes)), dtype = object)
  for i in range(len(tif_classes)):
    classes[i] = tif_classes[i]
  return classes

In [7]:
'''
Takes in an array representation of an image, an (N, 4) array of the coordinates of its bounding
boxes, an (N, 1) array of its classes for every bounding box, and the size of each chip, and returns
three objects:

- images: An array representation of each chip
- total_boxes: A dictionary containing an array representation of each bounding box(es) per chip
- total_classes: A dictionary containing an array representation of each class(es) associated width
  each bounding box(es) per chip
'''

def chip_image(img,coords,classes,shape=(256,256)):
  height,width,_ = img.shape
  wn,hn = shape

  w_num,h_num = (int(width/wn),int(height/hn))
  images = np.zeros((w_num*h_num,hn,wn,3))
  total_boxes = {}
  total_classes = {}

  k = 0
  for i in range(w_num):
    for j in range(h_num):
      x = np.logical_or(np.logical_and((coords[:,0]<((i+1)*wn)),(coords[:,0]>(i*wn))),
                        np.logical_and((coords[:,2]<((i+1)*wn)),(coords[:,2]>(i*wn))))
      out = coords[x]
      y = np.logical_or(np.logical_and((out[:,1]<((j+1)*hn)),(out[:,1]>(j*hn))),
                        np.logical_and((out[:,3]<((j+1)*hn)),(out[:,3]>(j*hn))))
      outn = out[y]
      out = np.transpose(np.vstack((np.clip(outn[:,0]-(wn*i),0,wn),
                                    np.clip(outn[:,1]-(hn*j),0,hn),
                                    np.clip(outn[:,2]-(wn*i),0,wn),
                                    np.clip(outn[:,3]-(hn*j),0,hn))))
      box_classes = classes[x][y]

      if out.shape[0] != 0:
        total_boxes[k] = out
        total_classes[k] = box_classes
      else:
        total_boxes[k] = np.array([[0,0,0,0]])
        total_classes[k] = np.array([0])

      chip = img[hn*j:hn*(j+1),wn*i:wn*(i+1),:3]
      images[k]=chip

      k = k + 1

  return images.astype(np.uint8),total_boxes,total_classes

In [11]:
'''
Print the resulting image, box and class arrays from chipping
This is currently a "debug" function; in the future, this will be expanded to convert the chips
to TensorRecord objects to be used for train/test sets.
'''

def print_results(images, boxes, classes):
  images_array = []
  boxes_array = []
  classes_array = []

  for i in images:
    images_array.append(i)
  for b in boxes:
    boxes_array.append(boxes[b])
  for c in classes:
    classes_array.append(classes[c])

  print(images_array)
  print(boxes_array)
  print(classes_array)

In [8]:
# Chips a single tif image file

def image_chipping(tif):
  tif_name, image, image_array = get_image_resources(tif)
  coordinates = get_coordinates(image, tif_name)
  classes = get_classes(image, tif_name)
  images, boxes, classes = chip_image(image_array, coordinates, classes)
  print_results(images, boxes, classes)

In [None]:
# Chips a folder of tif image files

def chip_folder(folder):
  for filename in os.listdir(folder):
    f = os.path.join(folder, filename)
    ext = os.path.splitext(f)[-1].lower()
    if os.path.isfile(f) and ext == ".tif":
      print(f)
      image_chipping(f)

## Chipping

Run the one cell that corresponds to your situation (single tif file/folder of tif files).

### Single tif File

In [12]:
# Single tif file

image_chipping(mytif)

[array([[[60, 71, 69],
        [52, 63, 69],
        [47, 58, 69],
        ...,
        [61, 79, 76],
        [65, 81, 76],
        [64, 78, 72]],

       [[60, 73, 72],
        [53, 66, 71],
        [50, 62, 71],
        ...,
        [59, 77, 73],
        [61, 79, 73],
        [59, 75, 69]],

       [[60, 75, 74],
        [55, 70, 74],
        [53, 67, 73],
        ...,
        [59, 75, 71],
        [59, 75, 71],
        [55, 70, 67]],

       ...,

       [[55, 67, 70],
        [51, 65, 68],
        [47, 65, 66],
        ...,
        [65, 72, 80],
        [69, 76, 81],
        [70, 76, 80]],

       [[52, 65, 68],
        [49, 62, 67],
        [46, 60, 65],
        ...,
        [67, 74, 80],
        [70, 77, 82],
        [71, 77, 81]],

       [[51, 65, 67],
        [47, 60, 66],
        [46, 57, 64],
        ...,
        [71, 78, 83],
        [74, 80, 84],
        [73, 80, 84]]], dtype=uint8), array([[[48, 63, 65],
        [49, 60, 66],
        [50, 59, 66],
        ...,
        [73

### Folder of tif Files

In [None]:
# Folder of tif files

chip_folder(folder)