In [1]:
from pathlib import Path
from fertilizer_recommender.infrastructure.utils.config_loader import load_yaml_config
from fertilizer_recommender.infrastructure.repositories.dataset_repository_impl import CsvDatasetRepository
from fertilizer_recommender.domain.entities.fertilizer_features import FertilizerFeaturesSchema
from fertilizer_recommender.application.use_cases.prepare_dataset import PrepareDatasetUseCase
from fertilizer_recommender.infrastructure.utils.root_finder import get_repository_root
from fertilizer_recommender.infrastructure.observability.logger import setup_logger
from loguru import logger

setup_logger(
    project_name="fertilizer_recommender",
    environment="notebook",
    level="DEBUG",
)

root = get_repository_root()
config_path = root / "configs/training.yaml"

cfg = load_yaml_config(config_path)

In [2]:
from pathlib import Path
from fertilizer_recommender.infrastructure.repositories.dataset_repository_impl import CsvDatasetRepository
from fertilizer_recommender.domain.entities.fertilizer_features import FertilizerFeaturesSchema
from fertilizer_recommender.application.use_cases.prepare_dataset import PrepareDatasetUseCase
from fertilizer_recommender.infrastructure.ml.preprocessors.sklearn_transformer import SklearnFeatureTransformer
from fertilizer_recommender.infrastructure.ml.models.baseline_logreg import BaselineLogisticRegression
from fertilizer_recommender.infrastructure.ml.pipelines.training_pipeline import TrainingPipeline
from fertilizer_recommender.application.use_cases.train_model import TrainModelUseCase
from fertilizer_recommender.application.use_cases.predict_topk import PredictTopKUseCase
from fertilizer_recommender.domain.services.metric_service import map_at_k



schema = FertilizerFeaturesSchema(
    numeric_features=[
        "Temparature", "Humidity", "Moisture",
        "Nitrogen", "Potassium", "Phosphorous",
    ],
    categorical_features=["Soil Type", "Crop Type"],
)

repo = CsvDatasetRepository(
    data_dir=Path("../data/raw"),
    train_file=cfg["data"]["train_file"],
    test_file=cfg["data"]["test_file"],
)

train_df, test_df = PrepareDatasetUseCase(
    repo, schema, cfg["data"]["target_col"]
).execute()

X_train = train_df[schema.all_features]
y_train = train_df[cfg["data"]["target_col"]]

pipeline = TrainingPipeline(
    transformer=SklearnFeatureTransformer(
        schema.numeric_features,
        schema.categorical_features,
    ),
    model=BaselineLogisticRegression(random_state=cfg["project"]["seed"]),
)

pipeline = TrainModelUseCase(pipeline).execute(X_train, y_train)

topk_preds = PredictTopKUseCase(
    pipeline, k=cfg["training"]["top_k"]
).execute(X_train)

score = map_at_k(y_train.tolist(), topk_preds, k=3)
print(f"MAP@3 (train, baseline) = {score:.4f}")


MAP@3 (train, baseline) = 0.2875
