In [2]:
from fertilizer_recommender.infrastructure.utils.config_loader import load_yaml_config
from fertilizer_recommender.infrastructure.utils.root_finder import get_repository_root

root = get_repository_root()

config_path = root / "configs/training.yaml"

cfg = load_yaml_config(config_path)

print(cfg)

cfg["paths"]['data_raw_dir']

{'project': {'name': 'fertilizer_recommender', 'seed': 42}, 'paths': {'data_raw_dir': 'data/raw', 'artifacts_dir': 'artifacts', 'models_dir': 'artifacts/models', 'reports_dir': 'artifacts/reports'}, 'data': {'train_file': 'train.csv', 'test_file': 'test.csv', 'target_col': 'Fertilizer Name', 'id_col': 'id'}, 'training': {'n_splits': 5, 'top_k': 3}}


'data/raw'

In [7]:
import pandas as pd


df_train = pd.read_csv("/Users/surelmanda/Downloads/ml-projects/Clean-Architecture-MLops/fertilizer_recommender/data/raw/train.csv")

display(df_train.head())

print("---"*20)

df_test = pd.read_csv("/Users/surelmanda/Downloads/ml-projects/Clean-Architecture-MLops/fertilizer_recommender/data/raw/test.csv")

display(df_test.head())

Unnamed: 0,id,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,0,37,70,36,Clayey,Sugarcane,36,4,5,28-28
1,1,27,69,65,Sandy,Millets,30,6,18,28-28
2,2,29,63,32,Sandy,Millets,24,12,16,17-17-17
3,3,35,62,54,Sandy,Barley,39,12,4,10-26-26
4,4,35,58,43,Red,Paddy,37,2,16,DAP


------------------------------------------------------------


Unnamed: 0,id,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous
0,750000,31,70,52,Sandy,Wheat,34,11,24
1,750001,27,62,45,Red,Sugarcane,30,14,15
2,750002,28,72,28,Clayey,Ground Nuts,14,15,4
3,750003,37,53,57,Black,Ground Nuts,18,17,36
4,750004,31,55,32,Red,Pulses,13,19,14


In [13]:
df_train["Fertilizer Name"].value_counts()

Fertilizer Name
14-35-14    114436
10-26-26    113887
17-17-17    112453
28-28       111158
20-20       110889
DAP          94860
Urea         92317
Name: count, dtype: int64

In [6]:
from pathlib import Path
from fertilizer_recommender.infrastructure.utils.config_loader import load_yaml_config
from fertilizer_recommender.infrastructure.repositories.dataset_repository_impl import CsvDatasetRepository
from fertilizer_recommender.domain.entities.fertilizer_features import FertilizerFeaturesSchema
from fertilizer_recommender.application.use_cases.prepare_dataset import PrepareDatasetUseCase
from fertilizer_recommender.infrastructure.utils.root_finder import get_repository_root
from fertilizer_recommender.infrastructure.observability.logger import setup_logger
from loguru import logger

setup_logger(
    project_name="fertilizer_recommender",
    environment="notebook",
    level="DEBUG",
)

root = get_repository_root()
config_path = root / "configs/training.yaml"

cfg = load_yaml_config(config_path)

schema = FertilizerFeaturesSchema(
    numeric_features=[
        "Temparature",
        "Humidity",
        "Moisture",
        "Nitrogen",
        "Potassium",
        "Phosphorous",
    ],
    categorical_features=[
        "Soil Type",
        "Crop Type",
    ],
)

repo = CsvDatasetRepository(
    data_dir=Path(root / cfg["paths"]['data_raw_dir']),
    train_file=cfg["data"]["train_file"],
    test_file=cfg["data"]["test_file"],
)

use_case = PrepareDatasetUseCase(
    dataset_repository=repo,
    schema=schema,
    target_col=cfg["data"]["target_col"],
)

train_df, test_df = use_case.execute()

logger.info(f"Train shape: {train_df.shape}")
logger.info(f"Test shape: {test_df.shape}")

display(train_df.head())

print("---"*20)

display(test_df.head())


[32m2025-12-15 12:58:01[0m | [1mINFO[0m | [36mfertilizer_recommender[0m | [35mnotebook[0m | 3588318800:<module>:50 | [1mTrain shape: (750000, 10)[0m
[32m2025-12-15 12:58:01[0m | [1mINFO[0m | [36mfertilizer_recommender[0m | [35mnotebook[0m | 3588318800:<module>:51 | [1mTest shape: (250000, 9)[0m


Unnamed: 0,id,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,0,37,70,36,Clayey,Sugarcane,36,4,5,28-28
1,1,27,69,65,Sandy,Millets,30,6,18,28-28
2,2,29,63,32,Sandy,Millets,24,12,16,17-17-17
3,3,35,62,54,Sandy,Barley,39,12,4,10-26-26
4,4,35,58,43,Red,Paddy,37,2,16,DAP


------------------------------------------------------------


Unnamed: 0,id,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous
0,750000,31,70,52,Sandy,Wheat,34,11,24
1,750001,27,62,45,Red,Sugarcane,30,14,15
2,750002,28,72,28,Clayey,Ground Nuts,14,15,4
3,750003,37,53,57,Black,Ground Nuts,18,17,36
4,750004,31,55,32,Red,Pulses,13,19,14
