In [1]:
from pathlib import Path
from health_lifestyle_diabetes.infrastructure.utils.config_loader import YamlConfigLoader
from health_lifestyle_diabetes.infrastructure.utils.paths import get_repository_root
from health_lifestyle_diabetes.infrastructure.logger.config import configure_logging
from health_lifestyle_diabetes.infrastructure.logger.loguru_logger import LoguruLogger
from health_lifestyle_diabetes.infrastructure.data_sources.csv_dataset_repository import CSVDatasetRepository 

# Configure logging
configure_logging(env="dev")

# Get repository root path
root = get_repository_root()

# Load configuration
config_loader = YamlConfigLoader()
paths = config_loader.load_config(f"{root}/configs/paths.yaml")
test_paths = get_repository_root() / Path(paths["data"]["input"]["test_dataset"])

# Initialize Logger
logger = LoguruLogger()

# Load Dataset fron CSV file
csv_repo = CSVDatasetRepository(logger=logger,source_path=test_paths) 

# Lit le fichier CSV et le charge en mémoire (DataFrame Pandas).
test = csv_repo.load_dataset()

# Show the 5 first rows
display(test.head()) # Affiche les 5 premières lignes pour validation.
print(f"le dataset chargé a {test.shape[0]} lignes et {test.shape[1]} colonnes")

[32m2025-12-28 12:13:20[0m | [1mINFO    [0m | [36mconfig.py:67[0m | [33mconfigure_logging()[0m | Loguru configuré avec succès (mode: dev) | {'env': 'dev'}
[32m2025-12-28 12:13:20[0m | [1mINFO    [0m | [36mcsv_dataset_repository.py:56[0m | [33mload_dataset()[0m | Chargement du dataset depuis : /Users/surelmanda/Downloads/ml-projects/Clean-Architecture-MLops/health_lifestyle_diabetes/data/input/test.csv | {}
[32m2025-12-28 12:13:20[0m | [1mINFO    [0m | [36mcsv_dataset_repository.py:63[0m | [33mload_dataset()[0m | Dataset chargé avec succès (19460 lignes, 31 colonnes). | {}


Unnamed: 0,Age,gender,ethnicity,education_level,income_level,employment_status,smoking_status,alcohol_consumption_per_week,physical_activity_minutes_per_week,diet_score,...,hdl_cholesterol,ldl_cholesterol,triglycerides,glucose_fasting,glucose_postprandial,insulin_level,hba1c,diabetes_risk_score,diabetes_stage,diagnosed_diabetes
0,22,Male,White,Highschool,Middle,Employed,Never,4,116,5.0,...,57,86,152,91,148,8.78,5.42,18.4,No Diabetes,0
1,62,Male,White,Graduate,Middle,Retired,Never,3,84,5.0,...,53,151,144,108,166,5.19,6.62,32.0,Type 2,1
2,33,Female,White,Highschool,Lower-Middle,Employed,Former,3,239,4.9,...,67,117,118,119,164,12.65,6.85,31.8,Type 2,1
3,41,Male,Asian,Highschool,Middle,Employed,Never,3,39,2.5,...,61,102,136,104,144,9.1,6.05,30.3,Pre-Diabetes,0
4,53,Male,White,Postgraduate,Upper-Middle,Employed,Never,2,12,4.8,...,72,84,199,92,140,3.32,5.88,35.3,Pre-Diabetes,0


le dataset chargé a 19460 lignes et 31 colonnes


In [3]:
import pandas as pd

from health_lifestyle_diabetes.infrastructure.streaming.pandas_dataframe_streamer import PandasDataFrameStreamer
from health_lifestyle_diabetes.application.services.dataframe_streamer_service import DataFrameStreamerService

# Fake dataset
df = pd.DataFrame([
    {"age": 45, "glucose": 132, "bmi": 29.3},
    {"age": 30, "glucose": 99,  "bmi": 23.1},
    {"age": 55, "glucose": 168, "bmi": 31.7},
])

# Injection de dépendances
streamer = PandasDataFrameStreamer()
service = DataFrameStreamerService(streamer)

# Streaming
for row in service.run(df, min_delay=1, max_delay=2):
    print(row)  # ici tu peux appeler une API ou Kafka
    data = pd.DataFrame([row])
    print(data)

[32m2025-12-28 14:39:32.501[0m | [1mINFO    [0m | [36mhealth_lifestyle_diabetes.infrastructure.streaming.pandas_dataframe_streamer[0m:[36mstream[0m:[36m35[0m - [1mEnvoi ligne user_id=abb4153a-d7e6-4238-9905-c0b64d1c056b[0m
[32m2025-12-28 14:39:33.744[0m | [1mINFO    [0m | [36mhealth_lifestyle_diabetes.infrastructure.streaming.pandas_dataframe_streamer[0m:[36mstream[0m:[36m35[0m - [1mEnvoi ligne user_id=3f52e359-f154-4c71-87a5-74e25da0eeca[0m


{'user_id': 'abb4153a-d7e6-4238-9905-c0b64d1c056b', 'age': 45.0, 'glucose': 132.0, 'bmi': 29.3}
                                user_id   age  glucose   bmi
0  abb4153a-d7e6-4238-9905-c0b64d1c056b  45.0    132.0  29.3


[32m2025-12-28 14:39:35.094[0m | [1mINFO    [0m | [36mhealth_lifestyle_diabetes.infrastructure.streaming.pandas_dataframe_streamer[0m:[36mstream[0m:[36m35[0m - [1mEnvoi ligne user_id=21c54937-7e3a-4897-bc62-a40c0d0ed45b[0m


{'user_id': '3f52e359-f154-4c71-87a5-74e25da0eeca', 'age': 30.0, 'glucose': 99.0, 'bmi': 23.1}
                                user_id   age  glucose   bmi
0  3f52e359-f154-4c71-87a5-74e25da0eeca  30.0     99.0  23.1
{'user_id': '21c54937-7e3a-4897-bc62-a40c0d0ed45b', 'age': 55.0, 'glucose': 168.0, 'bmi': 31.7}
                                user_id   age  glucose   bmi
0  21c54937-7e3a-4897-bc62-a40c0d0ed45b  55.0    168.0  31.7


In [None]:
j'aimerais maintenant mettre en dur la selection feature dans un file .py puis de les importer dans un environement de 