In [1]:
from pathlib import Path

from fertilizer_recommender.infrastructure.utils.config_loader import load_yaml_config
from fertilizer_recommender.infrastructure.repositories.dataset_repository_impl import CsvDatasetRepository
from fertilizer_recommender.domain.entities.fertilizer_features import FertilizerFeaturesSchema
from fertilizer_recommender.application.use_cases.prepare_dataset import PrepareDatasetUseCase

from fertilizer_recommender.infrastructure.ml.models.catboost_multiclass import CatBoostMulticlass
from fertilizer_recommender.infrastructure.tracking.run_name_generator import (
    generate_run_name,
)
from fertilizer_recommender.infrastructure.tracking.mlflow_tracker import MLflowExperimentTracker
from fertilizer_recommender.domain.services.experiment_tracking_service import (
    ExperimentTrackingService
)
from fertilizer_recommender.infrastructure.ml.cv.splitter import make_stratified_kfold
from fertilizer_recommender.application.use_cases.train_with_cv import TrainWithCVUseCase
from fertilizer_recommender.infrastructure.ml.preprocessors.sklearn_transformer import SklearnFeatureTransformer
from fertilizer_recommender.infrastructure.ml.pipelines.training_pipeline import TrainingPipeline


cfg_train = load_yaml_config("../configs/training.yaml")
cfg_models = load_yaml_config("../configs/models.yaml")
cfg_mlflow = load_yaml_config("../configs/mlflow.yaml")

schema = FertilizerFeaturesSchema(
    numeric_features=["Temparature","Humidity","Moisture","Nitrogen","Potassium","Phosphorous"],
    categorical_features=["Soil Type","Crop Type"],
)

repo = CsvDatasetRepository(
    data_dir=Path("../data/raw"),
    train_file=cfg_train["data"]["train_file"],
    test_file=cfg_train["data"]["test_file"],
)

train_df, _ = PrepareDatasetUseCase(
    repo, schema, cfg_train["data"]["target_col"]
).execute()

X = train_df[schema.all_features]
y = train_df[cfg_train["data"]["target_col"]]


EXPERIMENT_NAME = cfg_mlflow["mlflow"]["experiment_name"]
RUN_NAME = generate_run_name(prefix="Catb")

# üîë Wiring propre
tracker = MLflowExperimentTracker()
experiment_service = ExperimentTrackingService(tracker)


def splitter_factory():
    return make_stratified_kfold(
        n_splits=cfg_train["training"]["n_splits"],
        seed=cfg_train["project"]["seed"],
    )

def catboost_pipeline_factory():
    return TrainingPipeline(
        transformer=SklearnFeatureTransformer(
            schema.numeric_features,
            schema.categorical_features,
        ),
        model = CatBoostMulticlass(
            **cfg_models["catboost"],
    ),
    )

use_case = TrainWithCVUseCase(
    experiment_service=experiment_service,
    splitter_factory=splitter_factory,
    pipeline_factory=catboost_pipeline_factory,
    top_k=cfg_train["training"]["top_k"],
)

params = {
    "model": "logreg_multinomial",
    "n_splits": cfg_train["training"]["n_splits"],
    "top_k": cfg_train["training"]["top_k"],
    "seed": cfg_train["project"]["seed"],
}

result = use_case.execute(
    X_df=X,
    y=y,
    experiment_name=EXPERIMENT_NAME,
    run_name=RUN_NAME,
    params=params,
)

print("Fold MAP@3:", result.fold_scores)
print("Mean MAP@3:", result.mean_score)

[32m2026-01-08 15:48:11.742[0m | [34m[1mDEBUG   [0m | [36mfertilizer_recommender.infrastructure.tracking.mlflow_setup[0m:[36m__init__[0m:[36m36[0m - [34m[1mTracking URI : sqlite://///Users/surelmanda/Downloads/ml-projects/mlflow_central/db/mlruns.db[0m
[32m2026-01-08 15:48:11.744[0m | [34m[1mDEBUG   [0m | [36mfertilizer_recommender.infrastructure.tracking.mlflow_setup[0m:[36m__init__[0m:[36m37[0m - [34m[1mArtifact URI : /Users/surelmanda/Downloads/ml-projects/mlflow_central/mlflow_artifacts[0m
2026/01/08 15:48:12 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2026/01/08 15:48:12 INFO mlflow.store.db.utils: Updating database tables
2026/01/08 15:48:12 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2026/01/08 15:48:12 INFO alembic.runtime.migration: Will assume non-transactional DDL.
2026/01/08 15:48:12 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2026/01/08 15:48:12 INFO alembic.runtime.migration: Will assume non-

0:	learn: 1.9453194	total: 401ms	remaining: 4m
100:	learn: 1.9288982	total: 31.5s	remaining: 2m 35s
200:	learn: 1.9195964	total: 1m 4s	remaining: 2m 8s
300:	learn: 1.9112166	total: 1m 49s	remaining: 1m 48s
400:	learn: 1.9032716	total: 2m 20s	remaining: 1m 9s
500:	learn: 1.8955490	total: 2m 52s	remaining: 34.1s


[32m2026-01-08 15:51:41.190[0m | [1mINFO    [0m | [36mfertilizer_recommender.application.use_cases.train_with_cv[0m:[36mexecute[0m:[36m95[0m - [1m[Fold 1] Pr√©diction et calcul du top-3[0m


599:	learn: 1.8884225	total: 3m 22s	remaining: 0us


[32m2026-01-08 15:51:43.344[0m | [32m[1mSUCCESS [0m | [36mfertilizer_recommender.application.use_cases.train_with_cv[0m:[36mexecute[0m:[36m106[0m - [32m[1m[Fold 1] Score MAP@3 = 0.3191[0m
[32m2026-01-08 15:51:43.345[0m | [1mINFO    [0m | [36mfertilizer_recommender.domain.services.experiment_tracking_service[0m:[36mlog_evaluation[0m:[36m46[0m - [1mM√©triques d'√©valuation : {'map_3_fold': 0.3191133333333964}[0m
[32m2026-01-08 15:51:43.345[0m | [34m[1mDEBUG   [0m | [36mfertilizer_recommender.infrastructure.tracking.mlflow_tracker[0m:[36mlog_metrics[0m:[36m86[0m - [34m[1mM√©triques enregistr√©es : {'map_3_fold': 0.3191133333333964}[0m
[32m2026-01-08 15:51:43.477[0m | [1mINFO    [0m | [36mfertilizer_recommender.application.use_cases.train_with_cv[0m:[36mexecute[0m:[36m86[0m - [1m[Fold 2] D√©marrage (train=600000 obs, val=150000 obs)[0m
[32m2026-01-08 15:51:43.478[0m | [1mINFO    [0m | [36mfertilizer_recommender.application.use_cases.

0:	learn: 1.9454017	total: 272ms	remaining: 2m 42s
100:	learn: 1.9289427	total: 32s	remaining: 2m 37s
200:	learn: 1.9196455	total: 1m 3s	remaining: 2m 6s
300:	learn: 1.9112779	total: 1m 35s	remaining: 1m 35s
400:	learn: 1.9034276	total: 2m 18s	remaining: 1m 8s
500:	learn: 1.8958759	total: 3m 5s	remaining: 36.6s


[32m2026-01-08 15:55:30.899[0m | [1mINFO    [0m | [36mfertilizer_recommender.application.use_cases.train_with_cv[0m:[36mexecute[0m:[36m95[0m - [1m[Fold 2] Pr√©diction et calcul du top-3[0m


599:	learn: 1.8888462	total: 3m 42s	remaining: 0us


[32m2026-01-08 15:55:33.865[0m | [32m[1mSUCCESS [0m | [36mfertilizer_recommender.application.use_cases.train_with_cv[0m:[36mexecute[0m:[36m106[0m - [32m[1m[Fold 2] Score MAP@3 = 0.3200[0m
[32m2026-01-08 15:55:33.866[0m | [1mINFO    [0m | [36mfertilizer_recommender.domain.services.experiment_tracking_service[0m:[36mlog_evaluation[0m:[36m46[0m - [1mM√©triques d'√©valuation : {'map_3_fold': 0.32001333333340265}[0m
[32m2026-01-08 15:55:33.866[0m | [34m[1mDEBUG   [0m | [36mfertilizer_recommender.infrastructure.tracking.mlflow_tracker[0m:[36mlog_metrics[0m:[36m86[0m - [34m[1mM√©triques enregistr√©es : {'map_3_fold': 0.32001333333340265}[0m
[32m2026-01-08 15:55:34.142[0m | [1mINFO    [0m | [36mfertilizer_recommender.application.use_cases.train_with_cv[0m:[36mexecute[0m:[36m86[0m - [1m[Fold 3] D√©marrage (train=600000 obs, val=150000 obs)[0m
[32m2026-01-08 15:55:34.143[0m | [1mINFO    [0m | [36mfertilizer_recommender.application.use_case

0:	learn: 1.9453354	total: 428ms	remaining: 4m 16s
100:	learn: 1.9288622	total: 35.9s	remaining: 2m 57s
200:	learn: 1.9195720	total: 1m 20s	remaining: 2m 39s
300:	learn: 1.9111171	total: 2m 11s	remaining: 2m 10s
400:	learn: 1.9032601	total: 2m 58s	remaining: 1m 28s
500:	learn: 1.8959027	total: 3m 38s	remaining: 43.2s


[32m2026-01-08 15:59:54.472[0m | [1mINFO    [0m | [36mfertilizer_recommender.application.use_cases.train_with_cv[0m:[36mexecute[0m:[36m95[0m - [1m[Fold 3] Pr√©diction et calcul du top-3[0m


599:	learn: 1.8887244	total: 4m 13s	remaining: 0us


[32m2026-01-08 15:59:57.033[0m | [32m[1mSUCCESS [0m | [36mfertilizer_recommender.application.use_cases.train_with_cv[0m:[36mexecute[0m:[36m106[0m - [32m[1m[Fold 3] Score MAP@3 = 0.3196[0m
[32m2026-01-08 15:59:57.035[0m | [1mINFO    [0m | [36mfertilizer_recommender.domain.services.experiment_tracking_service[0m:[36mlog_evaluation[0m:[36m46[0m - [1mM√©triques d'√©valuation : {'map_3_fold': 0.3196322222222885}[0m
[32m2026-01-08 15:59:57.035[0m | [34m[1mDEBUG   [0m | [36mfertilizer_recommender.infrastructure.tracking.mlflow_tracker[0m:[36mlog_metrics[0m:[36m86[0m - [34m[1mM√©triques enregistr√©es : {'map_3_fold': 0.3196322222222885}[0m
[32m2026-01-08 15:59:57.413[0m | [1mINFO    [0m | [36mfertilizer_recommender.application.use_cases.train_with_cv[0m:[36mexecute[0m:[36m86[0m - [1m[Fold 4] D√©marrage (train=600000 obs, val=150000 obs)[0m
[32m2026-01-08 15:59:57.414[0m | [1mINFO    [0m | [36mfertilizer_recommender.application.use_cases.

0:	learn: 1.9453229	total: 330ms	remaining: 3m 17s
100:	learn: 1.9288593	total: 36.3s	remaining: 2m 59s
200:	learn: 1.9194407	total: 1m 9s	remaining: 2m 17s
300:	learn: 1.9111151	total: 1m 50s	remaining: 1m 49s
400:	learn: 1.9032574	total: 2m 22s	remaining: 1m 10s
500:	learn: 1.8957417	total: 2m 56s	remaining: 35s


[32m2026-01-08 16:03:33.207[0m | [1mINFO    [0m | [36mfertilizer_recommender.application.use_cases.train_with_cv[0m:[36mexecute[0m:[36m95[0m - [1m[Fold 4] Pr√©diction et calcul du top-3[0m


599:	learn: 1.8887100	total: 3m 30s	remaining: 0us


[32m2026-01-08 16:03:35.643[0m | [32m[1mSUCCESS [0m | [36mfertilizer_recommender.application.use_cases.train_with_cv[0m:[36mexecute[0m:[36m106[0m - [32m[1m[Fold 4] Score MAP@3 = 0.3200[0m
[32m2026-01-08 16:03:35.644[0m | [1mINFO    [0m | [36mfertilizer_recommender.domain.services.experiment_tracking_service[0m:[36mlog_evaluation[0m:[36m46[0m - [1mM√©triques d'√©valuation : {'map_3_fold': 0.32000666666673416}[0m
[32m2026-01-08 16:03:35.644[0m | [34m[1mDEBUG   [0m | [36mfertilizer_recommender.infrastructure.tracking.mlflow_tracker[0m:[36mlog_metrics[0m:[36m86[0m - [34m[1mM√©triques enregistr√©es : {'map_3_fold': 0.32000666666673416}[0m
[32m2026-01-08 16:03:35.919[0m | [1mINFO    [0m | [36mfertilizer_recommender.application.use_cases.train_with_cv[0m:[36mexecute[0m:[36m86[0m - [1m[Fold 5] D√©marrage (train=600000 obs, val=150000 obs)[0m
[32m2026-01-08 16:03:35.920[0m | [1mINFO    [0m | [36mfertilizer_recommender.application.use_case

0:	learn: 1.9453478	total: 618ms	remaining: 6m 10s
100:	learn: 1.9289165	total: 43.8s	remaining: 3m 36s
200:	learn: 1.9198391	total: 1m 32s	remaining: 3m 4s
300:	learn: 1.9114347	total: 2m 30s	remaining: 2m 29s
400:	learn: 1.9035912	total: 3m 9s	remaining: 1m 33s
500:	learn: 1.8959983	total: 3m 45s	remaining: 44.6s
599:	learn: 1.8889384	total: 4m 39s	remaining: 0us


[32m2026-01-08 16:08:22.780[0m | [1mINFO    [0m | [36mfertilizer_recommender.application.use_cases.train_with_cv[0m:[36mexecute[0m:[36m95[0m - [1m[Fold 5] Pr√©diction et calcul du top-3[0m
[32m2026-01-08 16:08:25.818[0m | [32m[1mSUCCESS [0m | [36mfertilizer_recommender.application.use_cases.train_with_cv[0m:[36mexecute[0m:[36m106[0m - [32m[1m[Fold 5] Score MAP@3 = 0.3201[0m
[32m2026-01-08 16:08:25.819[0m | [1mINFO    [0m | [36mfertilizer_recommender.domain.services.experiment_tracking_service[0m:[36mlog_evaluation[0m:[36m46[0m - [1mM√©triques d'√©valuation : {'map_3_fold': 0.3201022222222902}[0m
[32m2026-01-08 16:08:25.820[0m | [34m[1mDEBUG   [0m | [36mfertilizer_recommender.infrastructure.tracking.mlflow_tracker[0m:[36mlog_metrics[0m:[36m86[0m - [34m[1mM√©triques enregistr√©es : {'map_3_fold': 0.3201022222222902}[0m
[32m2026-01-08 16:08:25.833[0m | [1mINFO    [0m | [36mfertilizer_recommender.domain.services.experiment_tracking_s

Fold MAP@3: [0.3191133333333964, 0.32001333333340265, 0.3196322222222885, 0.32000666666673416, 0.3201022222222902]
Mean MAP@3: 0.31977355555562237


In [None]:
from pathlib import Path

from fertilizer_recommender.infrastructure.utils.config_loader import load_yaml_config
from fertilizer_recommender.infrastructure.repositories.dataset_repository_impl import CsvDatasetRepository
from fertilizer_recommender.domain.entities.fertilizer_features import FertilizerFeaturesSchema
from fertilizer_recommender.application.use_cases.prepare_dataset import PrepareDatasetUseCase

from fertilizer_recommender.infrastructure.ml.models.lightgbm_multiclass import LightGBMMulticlass
from fertilizer_recommender.infrastructure.tracking.run_name_generator import (
    generate_run_name,
)
from fertilizer_recommender.infrastructure.tracking.mlflow_tracker import MLflowExperimentTracker
from fertilizer_recommender.domain.services.experiment_tracking_service import (
    ExperimentTrackingService
)
from fertilizer_recommender.infrastructure.ml.cv.splitter import make_stratified_kfold
from fertilizer_recommender.application.use_cases.train_with_cv import TrainWithCVUseCase
from fertilizer_recommender.infrastructure.ml.preprocessors.sklearn_transformer import SklearnFeatureTransformer
from fertilizer_recommender.infrastructure.ml.pipelines.training_pipeline import TrainingPipeline


cfg_train = load_yaml_config("../configs/training.yaml")
cfg_models = load_yaml_config("../configs/models.yaml")
cfg_mlflow = load_yaml_config("../configs/mlflow.yaml")

schema = FertilizerFeaturesSchema(
    numeric_features=["Temparature","Humidity","Moisture","Nitrogen","Potassium","Phosphorous"],
    categorical_features=["Soil Type","Crop Type"],
)

repo = CsvDatasetRepository(
    data_dir=Path("../data/raw"),
    train_file=cfg_train["data"]["train_file"],
    test_file=cfg_train["data"]["test_file"],
)

train_df, _ = PrepareDatasetUseCase(
    repo, schema, cfg_train["data"]["target_col"]
).execute()

X = train_df[schema.all_features]
y = train_df[cfg_train["data"]["target_col"]]


EXPERIMENT_NAME = cfg_mlflow["mlflow"]["experiment_name"]
RUN_NAME = generate_run_name(prefix="Lightgbm")

# üîë Wiring propre
tracker = MLflowExperimentTracker()
experiment_service = ExperimentTrackingService(tracker)


def splitter_factory():
    return make_stratified_kfold(
        n_splits=cfg_train["training"]["n_splits"],
        seed=cfg_train["project"]["seed"],
    )

def lightgbm_pipeline_factory():
    return TrainingPipeline(
        transformer=SklearnFeatureTransformer(
            schema.numeric_features,
            schema.categorical_features,
        ),
        model=LightGBMMulticlass(
            num_class=len(y.unique()),
            **cfg_models["lightgbm"],
        ),
    )

use_case = TrainWithCVUseCase(
    experiment_service=experiment_service,
    splitter_factory=splitter_factory,
    pipeline_factory=lightgbm_pipeline_factory,
    top_k=cfg_train["training"]["top_k"],
)

params = {
    "model": "logreg_multinomial",
    "n_splits": cfg_train["training"]["n_splits"],
    "top_k": cfg_train["training"]["top_k"],
    "seed": cfg_train["project"]["seed"],
}

result = use_case.execute(
    X_df=X,
    y=y,
    experiment_name=EXPERIMENT_NAME,
    run_name=RUN_NAME,
    params=params,
)

print("Fold MAP@3:", result.fold_scores)
print("Mean MAP@3:", result.mean_score)

In [None]:
from pathlib import Path

from fertilizer_recommender.infrastructure.utils.config_loader import load_yaml_config
from fertilizer_recommender.infrastructure.repositories.dataset_repository_impl import CsvDatasetRepository
from fertilizer_recommender.domain.entities.fertilizer_features import FertilizerFeaturesSchema
from fertilizer_recommender.application.use_cases.prepare_dataset import PrepareDatasetUseCase

from fertilizer_recommender.infrastructure.ml.models.xgboost_multiclass import XGBoostMulticlass
from fertilizer_recommender.infrastructure.tracking.run_name_generator import (
    generate_run_name,
)
from fertilizer_recommender.infrastructure.tracking.mlflow_tracker import MLflowExperimentTracker
from fertilizer_recommender.domain.services.experiment_tracking_service import (
    ExperimentTrackingService
)
from fertilizer_recommender.infrastructure.ml.cv.splitter import make_stratified_kfold
from fertilizer_recommender.application.use_cases.train_with_cv import TrainWithCVUseCase
from fertilizer_recommender.infrastructure.ml.preprocessors.sklearn_transformer import SklearnFeatureTransformer
from fertilizer_recommender.infrastructure.ml.pipelines.training_pipeline import TrainingPipeline


cfg_train = load_yaml_config("../configs/training.yaml")
cfg_models = load_yaml_config("../configs/models.yaml")
cfg_mlflow = load_yaml_config("../configs/mlflow.yaml")

schema = FertilizerFeaturesSchema(
    numeric_features=["Temparature","Humidity","Moisture","Nitrogen","Potassium","Phosphorous"],
    categorical_features=["Soil Type","Crop Type"],
)

repo = CsvDatasetRepository(
    data_dir=Path("../data/raw"),
    train_file=cfg_train["data"]["train_file"],
    test_file=cfg_train["data"]["test_file"],
)

train_df, _ = PrepareDatasetUseCase(
    repo, schema, cfg_train["data"]["target_col"]
).execute()

X = train_df[schema.all_features]
y = train_df[cfg_train["data"]["target_col"]]


EXPERIMENT_NAME = cfg_mlflow["mlflow"]["experiment_name"]
RUN_NAME = generate_run_name(prefix="Lightgbm")

# üîë Wiring propre
tracker = MLflowExperimentTracker()
experiment_service = ExperimentTrackingService(tracker)


def splitter_factory():
    return make_stratified_kfold(
        n_splits=cfg_train["training"]["n_splits"],
        seed=cfg_train["project"]["seed"],
    )

def lightgbm_pipeline_factory():
    return TrainingPipeline(
        transformer=SklearnFeatureTransformer(
            schema.numeric_features,
            schema.categorical_features,
        ),
        model=XGBoostMulticlass(
            num_class=len(y.unique()),
            **cfg_models["xgboost"],
        ),
    )

use_case = TrainWithCVUseCase(
    experiment_service=experiment_service,
    splitter_factory=splitter_factory,
    pipeline_factory=lightgbm_pipeline_factory,
    top_k=cfg_train["training"]["top_k"],
)

params = {
    "model": "logreg_multinomial",
    "n_splits": cfg_train["training"]["n_splits"],
    "top_k": cfg_train["training"]["top_k"],
    "seed": cfg_train["project"]["seed"],
}

result = use_case.execute(
    X_df=X,
    y=y,
    experiment_name=EXPERIMENT_NAME,
    run_name=RUN_NAME,
    params=params,
)

print("Fold MAP@3:", result.fold_scores)
print("Mean MAP@3:", result.mean_score)