#DeepForest Training Walkthrough(CPU/GPU)
  - For GPU implementation.
      1. Select **Runtime** > Change **runtime type** and Select GPU as Hardware accelerator.

In [1]:
#install the package, on colab make sure to upgrade existing packages. This is not needed in a clean env.
! pip install --upgrade deepforest albumentations pyyaml

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
#pip install git+https://github.com/weecology/DeepForest.git

In [2]:
#load the modules
import os
import time
import numpy as np
from deepforest import main 
from deepforest import get_data
from deepforest import utilities
from deepforest import preprocess


In [3]:
#convert hand annotations from xml into retinanet format
#The get_data function is only needed when fetching sample package data
YELL_xml = get_data("2019_YELL_2_528000_4978000_image_crop2.xml")
annotation = utilities.xml_to_annotations(YELL_xml)
annotation.head()

Unnamed: 0,image_path,xmin,ymin,xmax,ymax,label
0,2019_YELL_2_528000_4978000_image_crop2.png,681,369,699,386,Tree
1,2019_YELL_2_528000_4978000_image_crop2.png,980,59,998,79,Tree
2,2019_YELL_2_528000_4978000_image_crop2.png,953,135,964,150,Tree
3,2019_YELL_2_528000_4978000_image_crop2.png,969,183,995,208,Tree
4,2019_YELL_2_528000_4978000_image_crop2.png,742,623,751,635,Tree


In [5]:
#load the image file corresponding to the annotaion file
YELL_train = get_data("2019_YELL_2_528000_4978000_image_crop2.png")
image_path = os.path.dirname(YELL_train)
#Write converted dataframe to file. Saved alongside the images
annotation.to_csv(os.path.join(image_path,"train_example.csv"), index=False)

## Prepare Training and Validation Data
  - 75% Training Data
  - 25% Validation Data

In [6]:
#Find annotation path
annotation_path = os.path.join(image_path,"train_example.csv")
#crop images will save in a newly created directory
#os.mkdir(os.getcwd(),'train_data_folder')
crop_dir = os.path.join(os.getcwd(),'train_data_folder')
train_annotations= preprocess.split_raster(path_to_raster=YELL_train,
                                 annotations_file=annotation_path,
                                 base_dir=crop_dir,
                                 patch_size=400,
                                 patch_overlap=0.05)

  s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [7]:
#Split image crops into training and test. Normally these would be different tiles! Just as an example.
image_paths = train_annotations.image_path.unique()
#split 25% validation annotation
valid_paths = np.random.choice(image_paths, int(len(image_paths)*0.25) )
valid_annotations = train_annotations.loc[train_annotations.image_path.isin(valid_paths)]
train_annotations = train_annotations.loc[~train_annotations.image_path.isin(valid_paths)]

In [8]:
#View output
train_annotations.head()
print("There are {} training crown annotations".format(train_annotations.shape[0]))
print("There are {} test crown annotations".format(valid_annotations.shape[0]))

#save to file and create the file dir
annotations_file= os.path.join(crop_dir,"train.csv")
validation_file= os.path.join(crop_dir,"valid.csv")
#Write window annotations file without a header row, same location as the "base_dir" above.
train_annotations.to_csv(annotations_file,index=False)
valid_annotations.to_csv(validation_file,index=False)

There are 595 training crown annotations
There are 179 test crown annotations


In [10]:
annotations_file

'/content/train_data_folder/train.csv'

## Training & Evaluating Using CPU

In [1]:
#initial the model and change the corresponding config file
m = main.deepforest()
m.config["train"]["csv_file"] = annotations_file
m.config["train"]["root_dir"] = os.path.dirname(annotations_file)
#Since this is a demo example and we aren't training for long, only show the higher quality boxes
m.config["score_thresh"] = 0.4
m.config["train"]['epochs'] = 2
m.config["validation"]["csv_file"] = validation_file
m.config["validation"]["root_dir"] = os.path.dirname(validation_file)
#create a pytorch lighting trainer used to training 
m.create_trainer()
#load the lastest release model 
m.use_release()

NameError: ignored

In [2]:
start_time = time.time()
m.trainer.fit(m)
print(f"--- Training on CPU: {(time.time() - start_time):.2f} seconds ---")

NameError: ignored

In [None]:
m.get_transform

In [None]:
#create a directory to save the predict image
save_dir = os.path.join(os.getcwd(),'pred_result')
try:
  os.mkdir(save_dir)
except FileExistsError:
  pass
results = m.evaluate(annotations_file, os.path.dirname(annotations_file), iou_threshold = 0.4, savedir = save_dir)

## Training & Evaluating Using GPU

In [16]:
#initial the model and change the corresponding config file
m = main.deepforest()
m.config['gpus'] = '-1' #move to GPU and use all the GPU resources
m.config["train"]["csv_file"] = annotations_file
m.config["train"]["root_dir"] = os.path.dirname(annotations_file)
m.config["score_thresh"] = 0.4
m.config["train"]['epochs'] = 2
m.config["validation"]["csv_file"] = validation_file
m.config["validation"]["root_dir"] = os.path.dirname(validation_file)
#create a pytorch lighting trainer used to training 
m.create_trainer()
#load the lastest release model 
m.use_release()

Reading config file: /usr/local/lib/python3.7/dist-packages/deepforest/data/deepforest_config.yml


  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Model from DeepForest release https://github.com/weecology/DeepForest/releases/tag/1.0.0 was already downloaded. Loading model from file.
Loading pre-built model: https://github.com/weecology/DeepForest/releases/tag/1.0.0


In [17]:
start_time = time.time()
m.trainer.fit(m)
print(f"--- Training on GPU: {(time.time() - start_time):.2f} seconds ---")

  f"The `LightningModule.{hook}` hook was deprecated in v1.6 and"
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type      | Params
------------------------------------
0 | model | RetinaNet | 32.1 M
------------------------------------
31.9 M    Trainable params
222 K     Non-trainable params
32.1 M    Total params
128.592   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

--- Training on GPU: 25.33 seconds ---


In [18]:
#save the prediction result to a prediction folder
save_dir = os.path.join(os.getcwd(),'pred_result')
try:
  os.mkdir(save_dir)
except FileExistsError:
  pass
results = m.evaluate(annotations_file, os.path.dirname(annotations_file), iou_threshold = 0.4, savedir= save_dir)

In [19]:
results

{'box_precision': 0.8697613996727234,
 'box_recall': 0.7650923431355191,
 'class_recall':    label  recall  precision  size
 0      0     1.0        1.0   520,
 'predictions':            xmin        ymin        xmax        ymax  label     score  \
 0     86.042671  229.488586  148.369171  285.611511      0  0.871951   
 1    368.696045   41.424515  400.000000  103.407578      0  0.866018   
 2    291.137512  260.928070  349.717285  318.137299      0  0.827130   
 3    305.715515   91.207336  351.964844  142.852264      0  0.644754   
 4    148.556946  264.188232  189.114929  303.834473      0  0.633438   
 ..          ...         ...         ...         ...    ...       ...   
 521   60.806461   88.560501  141.036987  173.316086      0  0.648514   
 522  313.902557  137.249954  359.161224  179.824661      0  0.595895   
 523   12.005398  149.668198   55.505718  190.333542      0  0.580893   
 524    1.403369  119.193794   28.147816  148.115005      0  0.576691   
 525  332.917603    0.

In [20]:
results['box_precision']

0.8697613996727234

In [21]:
results["box_recall"]

0.7650923431355191

In [22]:
results["results"]

Unnamed: 0,prediction_id,truth_id,IoU,score,xmin,xmax,ymin,ymax,predicted_label,true_label,image_path,match
0,14,0,0.000000,0.455820,192,230,33,72,Tree,Tree,2019_YELL_2_528000_4978000_image_crop2_10.png,False
1,10,1,0.848526,0.577429,93,178,162,233,Tree,Tree,2019_YELL_2_528000_4978000_image_crop2_10.png,True
2,8,2,0.728278,0.629440,147,185,96,130,Tree,Tree,2019_YELL_2_528000_4978000_image_crop2_10.png,True
3,7,3,0.832303,0.656030,165,197,133,170,Tree,Tree,2019_YELL_2_528000_4978000_image_crop2_10.png,True
4,11,4,0.045145,0.553770,189,222,74,108,Tree,Tree,2019_YELL_2_528000_4978000_image_crop2_10.png,False
...,...,...,...,...,...,...,...,...,...,...,...,...
5,8,5,0.000000,0.464332,378,400,45,76,Tree,Tree,2019_YELL_2_528000_4978000_image_crop2_9.png,False
6,1,6,0.790247,0.851713,216,272,62,122,Tree,Tree,2019_YELL_2_528000_4978000_image_crop2_9.png,True
7,3,7,0.776750,0.815376,201,246,123,169,Tree,Tree,2019_YELL_2_528000_4978000_image_crop2_9.png,True
8,5,8,0.785678,0.685737,227,266,181,216,Tree,Tree,2019_YELL_2_528000_4978000_image_crop2_9.png,True


In [23]:
results["class_recall"]

Unnamed: 0,label,recall,precision,size
0,0,1.0,1.0,520
