<a href="https://colab.research.google.com/github/Alexandra-Smith/FruitPunch_AI_for_Trees/blob/main/final_code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **AI for Trees - Data subteam**

This is the code used for working with all the images, including
- the .tif images provided, the dem files provided, as well as the geojson files containing segmented trees (labels)
- converting images provided into formats required for extracting patches (.tif)
- extracting the patches from the images and saving them to the folders on the drive

In [None]:
# Connect directory to notebook and navigate to where data is stored

# mount google drive
from google.colab import drive
drive.mount('/content/drive')

Import all needed modules

In [None]:
import matplotlib.pyplot as plt
import cv2
import numpy
import tensorflow as tf
import numpy as np
from PIL import Image
import glob
# import tensorflow_addons as tfa
from osgeo import gdal
import geopandas as gpd

Functions needed

In [None]:
# Function for extracting image patches
def extractPatches(image, patch_size, num_strides):
  ''' Extracting square image patches from an input image.
      returns: array of patches and total number of patches '''
  r, c, d = image.shape
  # reshape image to 4D tensor
  I = np.reshape(image, (1, r, c, d))
  p = tf.image.extract_patches(images=I, sizes=[1, patch_size, patch_size, 1], strides=[1, num_strides, num_strides, 1], rates=[1, 1, 1, 1], padding='SAME')
  P = p.numpy(); sh = P.shape; num_patches = np.int((sh[1]*sh[2]))
  # get numpy array of size (number of patches, patch size, patch size)
  patches = np.reshape(P, (num_patches, patch_size, patch_size, 3))
  return patches, num_patches

# Function to save the GEOJSON files to .tif images
def geojson_to_tif(path_to_images, geojson_labels, destination_path):
  for i in range(len(geojson_labels)):
  # get geojson
  g = geojson_labels[i]
  # get names
  path = g.replace("/content/drive/MyDrive/AI for Trees Share/Data to be Labeled/cfru.ti labeled/", ''); name = path.rpartition('.')[0]
  # get image
  img = path_to_images + 'Copy of ' + name + '.tif'
  image = cv2.imread(img)
  r, c, h = image.shape
  new_shapes = gpd.read_file(g)
  # save images at correct resolution
  save_file = destination_path + name + '.tif'
  new_shapes.plot()
  px = 1/plt.rcParams['figure.dpi']  # pixel in inches
  fig = plt.figure(figsize=(c*px, r*px)); plt.axis('off')
  plt.savefig(save_file, bbox_inches="tight", pad_inches=0, dpi='figure')

# Function to extract and save patches as .png files
def save_patches(files, image_path, save_folder):
  for f in files:
    # extract image names for saving tiles
    img_path = f.replace(image_path, '')
    img_name = img_path.rpartition('.')[0]
    # get image
    image = cv2.imread(f)
    # extract patches
    patches, num_patches = extractPatches(image=image, patch_size=256, num_strides=256)
    # save patches
    for i in range(1, num_patches+1):
      name = save_folder + img_name + '_' + str(i) + '.png'
      cv2.imwrite(name, patches[i-1, :, :, :])

# Function for saving gdal files as .tif images
def gdal_to_tif(imgs, dem_files, image_path, destination_path):
  for i in range(len(dem_files)):
    f = imgs[i]
    # get image names (ids)
    path = f.replace(image_path, ''); name = path.rpartition('.')[0]
    # get image
    image = cv2.imread(f)
    r, c, h = image.shape
    # get DEM
    d = dem_files[i]
    DEM = gdal.Open(d).ReadAsArray()
    DEM[DEM<0] = 0
    # save images
    px = 1/plt.rcParams['figure.dpi']  # pixel in inches
    fig = plt.figure(figsize=(c*px, r*px)); plt.axis('off'); plt.imshow(DEM); plt.tight_layout(pad=0)
    save = destination_path + name + '.tif'
    plt.savefig(save, bbox_inches="tight", pad_inches=0, dpi='figure')

Code to run to ..

(Don't need to re-run else will overwrite images in these folders)

Save all files to required formats for extracting patches

In [None]:
# save geojson to .tif
imgs_path = "/content/drive/MyDrive/AI for Trees Share/01. Data/all images/"
geojson_labels = glob.glob(f"/content/drive/MyDrive/AI for Trees Share/Data to be Labeled/cfru.ti labeled/*.geojson")
destination_path = '/content/drive/MyDrive/AI for Trees Share/Data to be Labeled/cfru.ti tif labels/'
geojson_to_tif(imgs_path, geojson_labels, destination_path)

# save gdal to .tif
# 2019
imgs19 = glob.glob(f"/content/drive/MyDrive/AI for Trees Share/01. Data/2019/img/*.tif")
dems19 = glob.glob(f"/content/drive/MyDrive/AI for Trees Share/01. Data/2019/dem/*.tif")
gdal_to_tif(imgs19, dems19, '/content/drive/MyDrive/AI for Trees Share/01. Data/2019/img/', '/content/drive/MyDrive/AI for Trees Share/01. Data/2019/DEM_img/')
# 2021
imgs21 = glob.glob(f"/content/drive/MyDrive/AI for Trees Share/01. Data/2021/img/*.tif")
dems21 = glob.glob(f"/content/drive/MyDrive/AI for Trees Share/01. Data/2021/dem/*.tif")
gdal_to_tif(imgs21, dems21, '/content/drive/MyDrive/AI for Trees Share/01. Data/2021/img/', '/content/drive/MyDrive/AI for Trees Share/01. Data/2021/DEM_img/')

In [None]:
# resize .tif images created from the gdal files to correct image resolution

# 2019
imgs19 = glob.glob(f"/content/drive/MyDrive/AI for Trees Share/01. Data/2019/img/*.tif")
ds19 = glob.glob(f"/content/drive/MyDrive/AI for Trees Share/01. Data/2019/DEM_img/*.tif")
for i in range(len(ds19)):
  f = imgs19[i]
  # get image names (ids)
  path = f.replace('/content/drive/MyDrive/AI for Trees Share/01. Data/2019/img/', '')
  name = path.rpartition('.')[0]
  # get image
  image = cv2.imread(f)
  r, c, h = image.shape
  # get DEM
  d = ds19[i]
  d_img = Image.open(d)
  resized_img = d_img.resize((c, r))
  # save images
  save_dest = '/content/drive/MyDrive/AI for Trees Share/01. Data/2019/final_dem/' + name + '.tif'
  resized_img.save(save_dest)

# 2021
imgs21 = glob.glob(f"/content/drive/MyDrive/AI for Trees Share/01. Data/2021/img/*.tif")
ds21 = glob.glob(f"/content/drive/MyDrive/AI for Trees Share/01. Data/2021/DEM_img/*.tif")
for i in range(len(ds19)):
  f = imgs21[i]
  # get image names (ids)
  path = f.replace('/content/drive/MyDrive/AI for Trees Share/01. Data/2021/img/', '')
  name = path.rpartition('.')[0]
  # get image
  image = cv2.imread(f)
  r, c, h = image.shape
  # get DEM
  d = ds21[i]
  d_img = Image.open(d)
  resized_img = d_img.resize((c, r))
  # save images
  save_dest = '/content/drive/MyDrive/AI for Trees Share/01. Data/2021/final_dem/' + name + '.tif'
  resized_img.save(save_dest)

Extract all patches

In [None]:
# for 2019 .tif
2019_files = glob.glob(f"/content/drive/MyDrive/AI for Trees Share/01. Data/2019/img/*.tif")
save_patches(2019_files, '/content/drive/MyDrive/AI for Trees Share/01. Data/2019/img/', '/content/drive/MyDrive/AI for Trees Share/01. Data/Tiles/256x256/imgs/')

In [None]:
# for 2021 .tif
2021_files = glob.glob(f"/content/drive/MyDrive/AI for Trees Share/01. Data/2021/img/*.tif")
save_patches(2021_files, '/content/drive/MyDrive/AI for Trees Share/01. Data/2021/img/', '/content/drive/MyDrive/AI for Trees Share/01. Data/Tiles/256x256/imgs/')

In [None]:
# for label images
label_files = glob.glob(f"/content/drive/MyDrive/AI for Trees Share/Data to be Labeled/cfru.ti tif labels/*.tif")
save_patches(label_files, "/content/drive/MyDrive/AI for Trees Share/Data to be Labeled/cfru.ti labeled/", '/content/drive/MyDrive/AI for Trees Share/Data to be Labeled/cfru.ti tif labels/')

In [None]:
# for DEM files

# 2019
dems19 = glob.glob(f"/content/drive/MyDrive/AI for Trees Share/01. Data/2019/final_dem/*.tif")
save_patches(dems19, '/content/drive/MyDrive/AI for Trees Share/01. Data/2019/final_dem/', '/content/drive/MyDrive/AI for Trees Share/01. Data/Tiles/256x256/dem/')

# 2021
dems21 = glob.glob(f"/content/drive/MyDrive/AI for Trees Share/01. Data/2021/final_dem/*.tif")
save_patches(dems21, '/content/drive/MyDrive/AI for Trees Share/01. Data/2021/final_dem/', '/content/drive/MyDrive/AI for Trees Share/01. Data/Tiles/256x256/dem/')

NameError: ignored