In [None]:
'''
Algorithme de fine-tuning de deepforest sur un ensemble de données
'''

In [1]:
! pip install deepforest
! pip install roboflow
from roboflow import Roboflow
from deepforest import main, get_data, preprocess, visualize
import matplotlib.pyplot as plt
import os
! pip install tensorboard
from pytorch_lightning.loggers import TensorBoardLogger
import pandas as pd
from PIL import Image
import numpy as np
from pathlib import Path
import datetime
import shutil
from IPython.display import clear_output
clear_output()

In [2]:
import torch
print(torch.__version__)
print(torch.cuda.is_available())
device = 'cuda' if torch.cuda.is_available() else 'cpu'

2.3.1+cu121
True


In [3]:
HOME = os.getcwd()
print(HOME)

/home/junior/Documents/stage-4a-SEDOGBO/model/model1/sam


In [4]:
rf = Roboflow(api_key="K72bvIl0rTcvckcth1sm")
project = rf.workspace("insa-3ptmt").project("final-tree-detection")
version = project.version(2)
dataset = version.download("tensorflow")
clear_output()


In [5]:
def format_annot(data_path : str):
    train_annot = pd.read_csv(data_path+"_annotations.csv")
    df_format = {"image_path": train_annot["filename"].to_list(), "xmin": train_annot["xmin"].to_list(), 
                'ymin': train_annot["ymin"].to_list(), "xmax": train_annot["xmax"].to_list(), 
                "ymax": train_annot["ymax"].to_list(), "label": train_annot["class"].to_list()}
    pd.DataFrame(df_format).to_csv(data_path+"formated_ann.csv")
    
    for i, file_path in enumerate(Path(data_path).glob("*")):
        if file_path.suffix.lower() in [".jpg", ".jpeg"]:
            im = np.array(Image.open(data_path+file_path.name))
            if file_path.name in df_format["image_path"]:
                output_annotations = preprocess.split_raster(
                    numpy_image =  im,
                    image_name=file_path.name,
                    annotations_file = data_path+"formated_ann.csv",
                    base_dir = data_path+"rasters",
                    patch_size = 400
                )
                csv_file_path = data_path + "rasters/formated_ann.csv"
                file_exists = os.path.isfile(csv_file_path)
                output_annotations.to_csv(csv_file_path, header=not file_exists, index=False, mode='a')    

In [6]:
%cd {HOME}
TRAIN_DATA_PATH = HOME+"/final-tree-detection-2/train/"
VALID_DATA_PATH = HOME+"/final-tree-detection-2/valid/"
TEST_DATA_PATH = HOME+"/final-tree-detection-2/test/"
format_annot(TRAIN_DATA_PATH)
format_annot(VALID_DATA_PATH)
format_annot(TEST_DATA_PATH)

/home/junior/Documents/stage-4a-SEDOGBO/model/model1/sam


In [7]:
class DeepDenseForest(main.deepforest):
    def __init__(self):
        super().__init__()
        self.use_release()
    
    def training_step(self, batch, batch_idx):
        self.model.train()
        path, images, targets = batch
        loss_dict = self.model.forward(images, targets)
        # sum of regression and classification loss
        losses = sum([loss for loss in loss_dict.values()])
        for key, value in loss_dict.items():
            self.log("train_{}".format(key), value, on_epoch=True)
        self.log("train_loss", losses, on_epoch=True, prog_bar=True)
        return losses
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        scheduler = {
            'scheduler': torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10),
            'monitor': 'val_loss'
        }
        return [optimizer], [scheduler]
    
    
    def validation_step(self, batch, batch_idx):
        try:
            path, images, targets = batch
        except:
            print("Empty batch encountered, skipping")
            return None

        self.model.train()
        with torch.no_grad():
            loss_dict = self.model.forward(images, targets)

        losses = sum([loss for loss in loss_dict.values()])
        self.log("val_loss", losses, on_epoch=True, prog_bar=True)
        self.logger.experiment
        self.model.eval()
        preds = self.model.forward(images)

        self.iou_metric.update(preds, targets)
        self.mAP_metric.update(preds, targets)

        for key, value in loss_dict.items():
            self.log("val_{}".format(key), value, on_epoch=True)

        for index, result in enumerate(preds):
            boxes = visualize.format_boxes(result)
            boxes["image_path"] = path[index]
            self.predictions.append(boxes)
        
        return losses

    def predict_step(self, batch, batch_idx):
        self.model.eval()
        path, images, targets = batch
        batch_results = self.model.forward(images)
        results = []
        for result in batch_results:
            boxes = visualize.format_boxes(result)
            results.append(boxes)

        return results
    
    

In [8]:
# CONFIG
import yaml
data = {
    "workers": 2,
    "devices": "auto",
    "accelerator": "auto",
    "batch_size": 1,    
    "architecture": 'retinanet',
    "num_classes": 1,
    "nms_thresh": 0.05,
    
    "retinanet":{
        # Non-max supression of overlapping predictions
        "score_thresh": 0.1
    },
    
    'train': {
        "lr": 0.001,
        # Print loss every n epochs
        "epochs": 10,
        # Useful debugging flag in pytorch lightning, set to True to get a single batch of training to test settings.
        "fast_dev_run": False,
        # pin images to GPU memory for fast training. This depends on GPU size and number of images.
        "preload_images": False,
        
        'csv_file': TEST_DATA_PATH+"rasters/formated_ann.csv",
        'root_dir': TEST_DATA_PATH+"rasters/"
    },
    'validation': {
        'csv_file': None,
        'root_dir': None,
        # Intersection over union evaluation
        "iou_threshold": 0.4,
        "val_accuracy_interval": 20
    },
    'save-snapshot': False
}
#TEST_DATA_PATH+"rasters/formated_ann.csv"
#TEST_DATA_PATH+"rasters/"
yaml_file_path = get_data("deepforest_config.yml")
with open(yaml_file_path, 'w') as yaml_file:
    yaml.dump(data, yaml_file, default_flow_style=False, sort_keys=False)

print(f"Données écrites dans {yaml_file_path}")

Données écrites dans /home/junior/anaconda3/envs/myenv/lib/python3.12/site-packages/deepforest/data/deepforest_config.yml


In [9]:
model = DeepDenseForest()
#clear_output()

Reading config file: /home/junior/anaconda3/envs/myenv/lib/python3.12/site-packages/deepforest/data/deepforest_config.yml
No validation file provided. Turning off validation loop


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Model from DeepForest release https://github.com/weecology/DeepForest/releases/tag/1.0.0 was already downloaded. Loading model from file.
Loading pre-built model: https://github.com/weecology/DeepForest/releases/tag/1.0.0


In [None]:
sample_image_path = get_data(HOME+"/final-tree-detection-2/test/rasters/2_1_jpeg.rf.ace0c0cfd6e144d694fc23c098573d58_4.png")
img = model.predict_image(path=sample_image_path, return_plot=True)
plt.imshow(img[:,:,::-1])

In [15]:
LOGS_FILE_PATH = HOME+"/tb_logs"
def rem_log_dir():
    try:
        shutil.rmtree(LOGS_FILE_PATH)
        print('Folder and its content removed')
    except:
        print('Folder not deleted')
rem_log_dir() # COMMENT IF WANNA KEEP MULTIPLE RECORDS
if not os.path.isdir(LOGS_FILE_PATH): os.mkdir(LOGS_FILE_PATH)
model.create_trainer(logger=TensorBoardLogger(LOGS_FILE_PATH, name="df",
    version=datetime.datetime.now().strftime("%Y%m%d-%H%M%S"), default_hp_metric=False), log_every_n_steps=1)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Folder and its content removed
No validation file provided. Turning off validation loop


In [16]:
model.trainer.fit(model)

2024-07-02 10:53:49.610663: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type                  | Params
-----------------------------------------------------
0 | model      | RetinaNet             | 32.1 M
1 | iou_metric | IntersectionOverUnion | 0     
2 | mAP_metric | MeanAveragePrecision  | 0     
-----------------------------------------------------
31.9 M    Trainable params
222 K     Non-trainable params
32.1 M    Total params
128.592   Total estimated model params size (MB)


Epoch 0:   0%|          | 0/4 [00:00<?, ?it/s] 

/home/junior/anaconda3/envs/myenv/lib/python3.12/site-packages/pytorch_lightning/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 3. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Epoch 0:  25%|██▌       | 1/4 [00:01<00:05,  0.59it/s, v_num=5348]

OutOfMemoryError: CUDA out of memory. Tried to allocate 1.61 GiB. GPU 

In [None]:
%load_ext tensorboard

# Start TensorBoard and display it in the notebook
%tensorboard --logdir tb_logs

In [None]:

sample_image_path = get_data(HOME+"/final-tree-detection-2/test/rasters/2_1_jpeg.rf.ace0c0cfd6e144d694fc23c098573d58_4.png")
img = model.predict_image(path=sample_image_path, return_plot=True)
plt.imshow(img[:,:,::-1])

In [None]:
%cd {HOME}
%mkdir df_preds
predictions = model.evaluate(csv_file=TEST_DATA_PATH+"rasters/formated_ann.csv", root_dir=TEST_DATA_PATH+"/rasters", iou_threshold=0.1, savedir=HOME+ "/df_preds")

predictions["results"].head()
print("Precision = ", predictions["box_prediction"], " and recall = ", predictions["box_recall"])

#predictions = model.evaluate(csv_file=VALID_DATA_PATH+"formated_ann.csv", root_dir=VALID_DATA_PATH, iou_threshold=0.4, savedir=HOME+ "/df_preds")
#predictions["results"].head()