In [None]:
from pathlib import Path

from fertilizer_recommender.infrastructure.utils.config_loader import load_yaml_config
from fertilizer_recommender.infrastructure.repositories.dataset_repository_impl import CsvDatasetRepository
from fertilizer_recommender.domain.entities.fertilizer_features import FertilizerFeaturesSchema
from fertilizer_recommender.application.use_cases.prepare_dataset import PrepareDatasetUseCase

from fertilizer_recommender.infrastructure.ml.preprocessors.feature_engineering import FeatureEngineer

cfg_train = load_yaml_config("../configs/training.yaml")
cfg_features = load_yaml_config("../configs/features.yaml")

schema = FertilizerFeaturesSchema(
    numeric_features=["Temperature","Humidity","Moisture","Nitrogen","Potassium","Phosphorous"],
    categorical_features=["Soil Type","Crop Type"],
)

repo = CsvDatasetRepository(
    data_dir=Path("../data/raw"),
    train_file=cfg_train["data"]["train_file"],
    test_file=cfg_train["data"]["test_file"],
)

train_df, _ = PrepareDatasetUseCase(
    repo, schema, cfg_train["data"]["target_col"]
).execute()

fe = FeatureEngineer(
    enable_ratios=cfg_features["feature_engineering"]["enable_ratios"],
    enable_interactions=cfg_features["feature_engineering"]["enable_interactions"],
)

df_fe = fe.transform(train_df[schema.all_features])

df_fe.head()