# Preparing the data
This notebook shows how to tile up RGB and crown data ready for training.

## Mount drive to access data and install *detectree2* package.

In [1]:
from google.colab import drive
drive.mount('/content/drive')
!pip install git+https://github.com/PatBall1/detectree2.git

Mounted at /content/drive
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/PatBall1/detectree2.git
  Cloning https://github.com/PatBall1/detectree2.git to /tmp/pip-req-build-fx7pbyk_
  Running command git clone --filter=blob:none --quiet https://github.com/PatBall1/detectree2.git /tmp/pip-req-build-fx7pbyk_
  Resolved https://github.com/PatBall1/detectree2.git to commit 6d56a8278cde15f8000cd238e0b5484b3d536699
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting detectron2@ git+https://github.com/facebookresearch/detectron2.git
  Cloning https://github.com/facebookresearch/detectron2.git to /tmp/pip-install-eu8loo43/detectron2_c67dc6b6a75e4230bdea72dafde6c400
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/detectron2.git /tmp/pip-install-eu8loo43/detectron2_c67dc6b6a75e4230bdea72dafde6c400
  Resolved https://github.com/facebookresearch/detectron2.git

## Set parameters for tiling

In [None]:
from detectree2.preprocessing.tiling import tile_data_train, to_traintest_folders
import rasterio
import geopandas as gpd
import shutil

# Point to directory where site data is stored
### PARACOU 2016
#site_path = "/content/drive/Shareddrives/detectree2/data/Paracou"
#img_path = site_path + "/rgb/2016/Paracou_RGB_2016_10cm.tif"
#crown_path = site_path + "/crowns/220908_Paracou2016.gpkg"

### PARACOU 2019
#site_path = "/content/drive/Shareddrives/detectree2/data/Paracou"
#img_path = site_path + "/rgb/2019/Paracou_RGB_2019.tif"
#crown_path = site_path + "/crowns/220619_AllSpLabelled.gpkg"

### PARACOU UAV NEW
#site_path = "/content/drive/Shareddrives/detectree2/data/Paracou"
#img_path = site_path + "/rgb/2020_22/Paracou_20220426_RGB10cm_mosa_rect.tif"
#crown_path = site_path + "/crowns/220619_AllSpLabelled.gpkg"

### DANUM
#site_path = "/content/drive/Shareddrives/detectree2/data/Danum"
#img_path = site_path + "/rgb/Dan_2014_RGB_project_to_CHM.tif"
#crown_path = site_path + "/crowns/Danum.gpkg"

### SEPILOK (East/West)
#site_path = "/content/drive/Shareddrives/detectree2/data/Sepilok"
#img_path = site_path + "/rgb/RCD105_MA14_21_orthomosaic_20141023_reprojected_full_res.tif"
#crown_path = site_path + "/crowns/SepilokEast.gpkg"
#crown_path = site_path + "/crowns/SepilokWest.gpkg"

### BCI 50 ha
#site_path = "/content/drive/Shareddrives/detectree2/data/BCI_50ha"
#img_path = site_path + "/rgb/2015.06.10_07cm_ORTHO.tif"
#crown_path = site_path + "/crowns/BCI_CrownData_2014-10-02_KCaligned/BCI_All_Crown_Data_10ha_50ha.shp"


### BCI 2019
#site_path = "/content/drive/Shareddrives/detectree2/data/BCI_2019"
#img_path = site_path + "/rgb/2019_06_24_BCI_WholeIsland.tif"
#crown_path = site_path + "/crowns/BCI_Island_2019_crowns.shp"

#out_dir = site_path + '/tiles/'
#out_dir = site_path + '/tilesEast/'
#out_dir = site_path + '/tilesWest/'

# Read in the tiff file
#data = rasterio.open(img_path)

# Read in crowns (then filter by an attribute?)
#crowns = gpd.read_file(crown_path)
#crowns = crowns.to_crs(data.crs.data)

# Set tiling parameters
buffer = 30
tile_width = 40
tile_height = 40
threshold = 0.4
appends = str(tile_width) + "_" + str(buffer) + "_" + str(threshold)



## Tile up the data
Function to tile up the data into managable training chunks. This function has some issues around the encoding of the input raster. ```dtype_bool``` should be switched if black tiles are being produced. A recommended threshold is ~0.5 but it depends on volume of available data  (with abundant, dense crown data, a sticter threshold may be preferable). 

In [None]:
import numpy as np
# Requires True: BCI_2019, Paracou
### SEPILOK
site_path = "/content/drive/Shareddrives/detectree2/data/Sepilok"
img_path = site_path + "/rgb/RCD105_MA14_21_orthomosaic_20141023_reprojected_full_res.tif"
crown_path = site_path + "/crowns/SepilokWest.gpkg"
out_dir = site_path + '/tilesW_' + appends + "/"

# Read in the tiff file
data = rasterio.open(img_path)

# Read in crowns (then filter by an attribute?)
crowns = gpd.read_file(crown_path)
crowns = crowns.to_crs(data.crs.data)
area = crowns.area

np.quantile(area, 0.01)

  in_crs_string = _prepare_from_proj_string(in_crs_string)


19.514305197726895

In [None]:
# Requires True: BCI_2019, Paracou
### PARACOU 2016
site_path = "/content/drive/Shareddrives/detectree2/data/Paracou"
img_path = site_path + "/rgb/2016/Paracou_RGB_2016_10cm.tif"
crown_path = site_path + "/crowns/220908_Paracou2016.gpkg"
out_dir = site_path + '/tiles2016_' + appends + "/"

# Remove existing tile directory
shutil.rmtree(out_dir, True)

# Read in the tiff file
data = rasterio.open(img_path)

# Read in crowns (then filter by an attribute?)
crowns = gpd.read_file(crown_path)
crowns = crowns.to_crs(data.crs.data)

tile_data_train(data, out_dir, buffer, tile_width, tile_height, crowns, threshold, dtype_bool = True)
to_traintest_folders(out_dir, out_dir, test_frac=0.05, folds=5)

  in_crs_string = _prepare_from_proj_string(in_crs_string)


In [None]:
### PARACOU 2019
site_path = "/content/drive/Shareddrives/detectree2/data/Paracou"
img_path = site_path + "/rgb/2019/Paracou_RGB_2019.tif"
crown_path = site_path + "/crowns/220908_Paracou2016.gpkg"
out_dir = site_path + '/tiles2019_' + appends + "/"

# Remove existing tile directory
shutil.rmtree(out_dir, True)

# Read in the tiff file
data = rasterio.open(img_path)

# Read in crowns (then filter by an attribute?)
crowns = gpd.read_file(crown_path)
crowns = crowns.to_crs(data.crs.data)

tile_data_train(data, out_dir, buffer, tile_width, tile_height, crowns, threshold, dtype_bool = True)
to_traintest_folders(out_dir, out_dir, test_frac=0.05, folds=5)

  in_crs_string = _prepare_from_proj_string(in_crs_string)


In [None]:
### DANUM
site_path = "/content/drive/Shareddrives/detectree2/data/Danum"
img_path = site_path + "/rgb/Dan_2014_RGB_project_to_CHM.tif"
crown_path = site_path + "/crowns/Danum.gpkg"
out_dir = site_path + '/tiles_' + appends + "/"

# Remove existing tile directory
shutil.rmtree(out_dir, True)

# Read in the tiff file
data = rasterio.open(img_path)

# Read in crowns (then filter by an attribute?)
crowns = gpd.read_file(crown_path)
#crowns = crowns[crowns.conf==1]
crowns = crowns.to_crs(data.crs.data)

tile_data_train(data, out_dir, buffer, tile_width, tile_height, crowns, threshold, dtype_bool = False)
to_traintest_folders(out_dir, out_dir, test_frac=0.05, folds=5)

  in_crs_string = _prepare_from_proj_string(in_crs_string)


In [None]:
### SEPILOK EAST
site_path = "/content/drive/Shareddrives/detectree2/data/Sepilok"
img_path = site_path + "/rgb/RCD105_MA14_21_orthomosaic_20141023_reprojected_full_res.tif"
crown_path = site_path + "/crowns/SepilokEast.gpkg"
out_dir = site_path + '/tilesE_' + appends + "/"

# Remove existing tile directory
shutil.rmtree(out_dir, True)

# Read in the tiff file
data = rasterio.open(img_path)

# Read in crowns (then filter by an attribute?)
crowns = gpd.read_file(crown_path)
#crowns = crowns[crowns.conf==1]
crowns = crowns.to_crs(data.crs.data)

tile_data_train(data, out_dir, buffer, tile_width, tile_height, crowns, threshold, dtype_bool = False)
to_traintest_folders(out_dir, out_dir, test_frac=0.05, folds=5)

  in_crs_string = _prepare_from_proj_string(in_crs_string)


In [None]:
### SEPILOK West
site_path = "/content/drive/Shareddrives/detectree2/data/Sepilok"
img_path = site_path + "/rgb/RCD105_MA14_21_orthomosaic_20141023_reprojected_full_res.tif"
crown_path = site_path + "/crowns/SepilokWest.gpkg"
out_dir = site_path + '/tilesW_' + appends + "/"

# Remove existing tile directory
shutil.rmtree(out_dir, True)

# Read in the tiff file
data = rasterio.open(img_path)

# Read in crowns (then filter by an attribute?)
crowns = gpd.read_file(crown_path)
#crowns = crowns[crowns.conf==1]
crowns = crowns.to_crs(data.crs.data)

tile_data_train(data, out_dir, buffer, tile_width, tile_height, crowns, threshold, dtype_bool = False)
to_traintest_folders(out_dir, out_dir, test_frac=0.05, folds=5)

  in_crs_string = _prepare_from_proj_string(in_crs_string)


In [None]:
### BCI50ha
### BCI 50 ha
site_path = "/content/drive/Shareddrives/detectree2/data/BCI_50ha"
img_path = site_path + "/rgb/2015.06.10_07cm_ORTHO.tif"
crown_path = site_path + "/crowns/BCI_CrownData_2014-10-02_KCaligned/BCI_All_Crown_Data_10ha_50ha.shp"
out_dir = site_path + '/tiles_' + appends + "/"

# Remove existing tile directory
shutil.rmtree(out_dir, True)

# Read in the tiff file
data = rasterio.open(img_path)

# Read in crowns (then filter by an attribute?)
crowns = gpd.read_file(crown_path)
#crowns = crowns[crowns.conf==1]
crowns = crowns.to_crs(data.crs.data)

tile_data_train(data, out_dir, buffer, tile_width, tile_height, crowns, threshold, dtype_bool = True)
to_traintest_folders(out_dir, out_dir, test_frac=0, folds=5)

  in_crs_string = _prepare_from_proj_string(in_crs_string)


In [None]:
import glob
### PARACOU UAV NEW
site_path = "/content/drive/Shareddrives/detectree2/data/Paracou"
img_folder = site_path + "/rgb/2020_22/4D/"
images = glob.glob(img_folder + "*.tif")
crown_path = site_path + "/crowns/220619_AllSpLabelled.gpkg"
out_dir = site_path + "/tilesUAV_" + appends + "/"

crowns = gpd.read_file(crown_path)

for image in images:
    data = rasterio.open(image)
    tile_data_train(data, out_dir, buffer, tile_width, tile_height, crowns, threshold, dtype_bool = True)
    crowns = gpd.read_file(crown_path)

## Send geojson to train/test folders
Send geojsons to train folder (with folds for k-fold cross validation) and test folder. Training tiles will automatically be remove if there is any overlap with a test tile.

In [None]:
from detectree2.preprocessing.tiling import to_traintest_folders
#out_folder = out_dir
to_traintest_folders(out_dir, out_dir, test_frac=0.0, folds=5)

## Visualise training data

Need to edit to register properly. Fixed in training script

In [None]:
# Let's look at our training image and annos for our geojson 
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.utils.visualizer import Visualizer
from detectree2.models.train import combine_dicts, register_train_data
import random
import cv2
from PIL import Image



name = "BCI_50ha"
train_location = "/content/drive/Shareddrives/detectree2/data/" + name + "/tilesTEST/train/"
dataset_dicts = combine_dicts(train_location, 1)
trees_metadata = MetadataCatalog.get(name + "_train")
#dataset_dicts = get_tree_dicts("./")
for d in dataset_dicts:
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=trees_metadata, scale=0.5)
    out = visualizer.draw_dataset_dict(d)
    image = cv2.cvtColor(out.get_image()[:, :, ::-1], cv2.COLOR_BGR2RGB)
    display(Image.fromarray(image))

Output hidden; open in https://colab.research.google.com to view.