In [1]:
import sys
sys.path.append('../')  # Ajustez le chemin selon la structure de votre dossier


In [2]:
from dataclasses import dataclass
from omegaconf import DictConfig

from lightning.pytorch import Trainer
from lightning.pytorch.loggers import MLFlowLogger
import colorlog
import logging
import rootutils

from src.models.object_detector import FasterRCNNModule
from src.data.data_module import GrotiusDataModule

In [3]:
# ~~~ Configuration ~~~
@dataclass
class Config:
    data_dir: str = (
        "/home/olivier/projet/pi/monorepo/doc-analyzer/model-experiment/data"
    )
    batch_size: int = 2
    num_workers: int = 4
    shuffle: bool = True
    weights: str = "FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT"
    dataset_files = ("CRH.yaml", "fasterRCNN_v2.yaml")
    experiment_name: str = "doc-analyzer-v1.0"
    logger_uri: str = "file:./mlruns"
    max_epochs: int = 2

config = Config()

In [4]:
# ~~~ Data Preparation ~~~
data_module = GrotiusDataModule(
    config.data_dir, config.batch_size, config.num_workers, config.shuffle
)
data_module.setup()

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


In [5]:
# ~~~ Model Initialization ~~~
model = FasterRCNNModule(config.weights)

In [6]:
# ~~~ Logger ~~~
mlf_logger = MLFlowLogger(
    experiment_name=config.experiment_name, tracking_uri=config.logger_uri
)

# ~~~ Training ~~~
trainer = Trainer(
    limit_train_batches=100,
    max_epochs=config.max_epochs,
    logger=mlf_logger,
    fast_dev_run=False,
)
trainer.fit(
    model,
    train_dataloaders=data_module,
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                  | Params
------------------------------------------------
0 | model | FasterRCNN            | 43.3 M
1 | iou   | IntersectionOverUnion | 0     
------------------------------------------------
43.0 M    Trainable params
225 K     Non-trainable params
43.3 M    Total params
173.066   Total estimated model params size (MB)


loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
                                                                           

/home/olivier/anaconda3/envs/pytorch/lib/python3.12/site-packages/lightning/pytorch/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 2. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Epoch 1: 100%|██████████| 64/64 [00:41<00:00,  1.53it/s, v_num=dca5, train_loss_step=0.424, train_loss_epoch=0.617]

`Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|██████████| 64/64 [00:42<00:00,  1.50it/s, v_num=dca5, train_loss_step=0.424, train_loss_epoch=0.617]


In [7]:
trainer.test(model, datamodule=data_module)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
Testing DataLoader 0:   0%|          | 0/21 [00:00<?, ?it/s]

TypeError: expected Tensor as element 0 in argument 0, but got dict