In [None]:
# install necessary geospatial packages

!pip -q install rasterio
!pip -q install fiona
!pip -q install geopandas
!pip -q install pycrs
!pip -q install descartes 
!pip -q install pypng

[K     |████████████████████████████████| 19.3 MB 137 kB/s 
[K     |████████████████████████████████| 15.4 MB 4.3 MB/s 
[K     |████████████████████████████████| 1.0 MB 5.2 MB/s 
[K     |████████████████████████████████| 6.3 MB 28.0 MB/s 
[?25h  Building wheel for pycrs (setup.py) ... [?25l[?25hdone
[K     |████████████████████████████████| 48 kB 2.4 MB/s 
[?25h

In [None]:
# mount google drive

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# necessary basic libraries
import pandas as pd
import numpy as np
import cv2
import random
import matplotlib.pyplot as plt
from PIL import Image
import os
import numpy as np
import json
import png

# geospatial libraries
import rasterio
import geopandas
from geopandas.tools import sjoin
import fiona
from rasterio.plot import show
from rasterio.mask import mask
from shapely.geometry import box
import geopandas as gpd
from fiona.crs import from_epsg
import pycrs
import descartes

In [None]:
### let's first read in the data.
### Firstly the tiff file of our area of interest, secondly our shapefile of manually delineated crowns, 
### if we are training the model rather than simply using a pre-trained model.

# Read in a tiff file
data = rasterio.open('/content/drive/Shareddrives/detectreegb/sepilok/rgb/RCD105_MA14_21_orthomosaic_20141023_reprojected_full_res.tif')

# Read in shapefile of crowns, if training on your own data!
#crowns = geopandas.read_file('/home/jovyan/lustre_scratch/sepilok_data/sep_danum_crowns_no_overlap/all_manual_crowns_no_overlap.shp')

# have a look at the crowns if we like
#crowns

In [None]:
# let's investigate the tiff, what is the shape? Bounds? Bands? CRS?
# show a plot of it too

print('shape =', data.shape,',', data.bounds, 'and number of bands =', data.count, ',crs =', data.crs)

# have a look if you want (usually slow)
#show(data)

shape = (40665, 84370) , BoundingBox(left=600543.5, bottom=644607.0, right=608980.5, top=648673.5) and number of bands = 4 ,crs = EPSG:32650


#### Looping through a set of tiles, to chop up the tiffs and crowns, and save as PNGs and GeoJSONs 

In [None]:
# just defining a function we are going to use shortly.

def getFeatures(gdf):
        """Function to parse features from GeoDataFrame in such a manner that rasterio wants them"""
        return [json.loads(gdf.to_json())['features'][0]['geometry']]

In [None]:
# Read in a tiff file, and the csv of the tiles we expect
data = rasterio.open('/content/drive/Shareddrives/detectreegb/sepilok/rgb/RCD105_MA14_21_orthomosaic_20141023_reprojected_full_res.tif')

# Read in the shapefile of manual crowns - again, this is only if you are wanting to train with your own crowns
#crowns = geopandas.read_file('/home/jovyan/lustre_scratch/sepilok_data/sep_danum_crowns_no_overlap/all_manual_crowns_no_overlap.shp')

# read in a csv containing the origin x and y coordinates of the tiles you wish to slice up...
# this file can be commented out and replaced by looping over x and y coordinates as below
tiles = pd.read_csv('/content/drive/Shareddrives/detectreegb/complete_training_tiles.csv')

# set the desired buffer, tile width, heght and resolution of the tiff tiles. Suggested values are given below. 
# The buffer will be affected by the area of tree crowns in your region of forest.

buffer = 20
tile_width = 100
tile_height = 100
resolution = 0.1 # in metres per pixel
scaling = 1/resolution  # scaling parameter to transform the shapefile coordinates so they match the png


# the for loop can be rewritten to work with out requiring a csv e.g. for minx in range(V, W, step): e.g.:
# for minx in range(0,10000,250):
#    for miny in range(0,10000,250):    
#        print(minx,miny)


for ind in tiles.index:
    minx = tiles['Origin X coordinate'][ind] 
    miny = tiles['Origin Y coordinate'][ind]
    
    # define the bounding box of the whole tile, including the buffer
    bbox = box(minx-buffer, miny-buffer, minx+tile_width+buffer, miny+tile_height+buffer)
    # define the bounding box of the tile, excluding the buffer (hence selecting just the central part of the tile)
    bbox_central = box(minx, miny, minx+tile_width, miny+tile_height)
    # turn the bounding boxes into geopandas DataFrames, need to set the crs
    geo = gpd.GeoDataFrame({'geometry': bbox}, index=[0], crs=from_epsg(32650))
    geo_central = gpd.GeoDataFrame({'geometry': bbox_central}, index=[0], crs=from_epsg(32650))
    
    ### here we are cropping the tiff to the bounding box of the tile we want
    coords = getFeatures(geo)
    #print(coords)
    
    # define the tile as a mask of the whole tiff with just the bounding box
    out_img, out_transform = mask(data, shapes=coords, crop=True)
    
    # copy the metadata
    out_meta = data.meta.copy()
    #print(out_meta)
    epsg_code = int(data.crs.data['init'][5:])
    #print(epsg_code)
    
    # update the metadata
    out_meta.update({"driver": "GTiff",
                     "height": out_img.shape[1],
                     "width": out_img.shape[2],
                     "transform": out_transform
                     })
    
    # here we are saving the tile as a new tiff, named by the origin of the tile
    out_tif = '/content/drive/Shareddrives/detectreegb/sepilok/tiles/tiffs/tile_'+str(minx)+'_'+str(miny)+'.tif'
    with rasterio.open(out_tif, "w", **out_meta) as dest:
                       dest.write(out_img)
    
    # read in the tile we have just saved
    clipped = rasterio.open('/content/drive/Shareddrives/detectreegb/sepilok/tiles/tiffs/tile_'+str(minx)+'_'+str(miny)+'.tif')
    # read it as an array
    arr = clipped.read()
    
    # check the shape of the tile if you wish
    #print(arr.shape)
    
    # each band of the tiled tiff is a colour!
    R = arr[0]
    G = arr[1]
    B = arr[2]
    
    # stack up the bands in an order appropriate for saving with cv2, then rescale to the correct 0-255 range for cv2
    
    rgb = np.dstack((B,G,R)) # BGR for cv2
    #rgb_rescaled = 255*rgb/65535 # scale the values of the bands if they are non-standard to range 0-255
    rgb_rescaled = rgb # usually rescaling is not required, but it depends on your tiff

    # save this as jpg or png...we are going for png...again, named with the origin of the specific tile
    cv2.imwrite('/content/drive/Shareddrives/detectreegb/sepilok/tiles/pngs/tile_'+str(minx)+'_'+str(miny)+'.png', rgb_rescaled)
    
    
    ### now we have dealt with tiling the tiff, we want to deal with tiling the crowns...
    ### IF we have manual crowns we are going to use for training
    ### THEN uncomment the following 4 lines of code
    
    ### select the crowns that intersect the non-buffered central section of the tile using the inner join
    #overlapping_crowns = sjoin(crowns, geo_central, how="inner")
    
    ### translate to 0,0 to overlay on png
    #moved = overlapping_crowns.translate(-minx+buffer, -miny+buffer)  
    
    ### scale to deal with the resolution
    #moved_scaled = moved.scale(scaling, scaling, origin=(0, 0)) 
    
    ### save as a geojson, a format compatible with detectron2, again named by the origin of the tile
    #moved_scaled.to_file(driver = 'GeoJSON', 
    #                           filename= '/home/jovyan/lustre_scratch/sepilok_data/spare_crowns/tile_'+str(minx)+'_'+str(miny)+'.geojson')
        

  in_crs_string = _prepare_from_proj_string(in_crs_string)


ValueError: ignored