In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import requests

from domain.entities import Station
from services.data_fetcher import DataFetcher
from api.extractors import APIExtractor
from processing.validators import DataValidator
from processing.transformers import DataTransformer

def load_stations():
    csv_path = Path("data/stations/stations_meteo_test.csv")
    df = pd.read_csv(csv_path, sep=';')
    
    stations = []
    for _, row in df.iterrows():
        station = Station(
            id=row['id_nom'],
            name=row['nom'],
            longitude=row['longitude'],
            latitude=row['latitude']
        )
        stations.append(station)
    
    return stations

In [2]:
station = load_stations()[0]
station_id = load_stations()[0].id
station_id

'42-station-meteo-toulouse-parc-compans-cafarelli'

In [3]:
data_fetcher = DataFetcher()
data_fetcher.fetch_and_load(station)

Successfully loaded 96 reports for Parc Compans Cafarelli


True

In [20]:
extractor = APIExtractor()
data = extractor.extract(station)

In [21]:
data

Unnamed: 0,heure_de_paris,temperature_en_degre_c,humidite,pression
0,2025-11-06T11:00:00+00:00,12.6,87,99200
1,2025-11-06T10:00:00+00:00,12.9,86,99300
2,2025-11-06T09:00:00+00:00,13.2,86,99200
3,2025-11-06T08:00:00+00:00,13.5,84,99300
4,2025-11-06T07:00:00+00:00,14.9,81,99300
...,...,...,...,...
91,2025-11-02T16:00:00+00:00,14.9,75,100600
92,2025-11-02T15:00:00+00:00,15.3,74,100500
93,2025-11-02T14:00:00+00:00,16.1,69,100500
94,2025-11-02T13:00:00+00:00,15.8,71,100500


In [22]:
transformer = DataTransformer()
formated_data = transformer.format_data(data)

In [23]:
validator = DataValidator()
validator.is_format_correct(formated_data)

False

In [24]:
# Debug chaque condition
print(f"Pressure dtype: {formated_data['pression'].dtype}")
print(f"Pressure is int64: {formated_data['pression'].dtype == np.int64}")

print(f"Humidity dtype: {formated_data['humidite'].dtype}")
print(f"Humidity is int64: {formated_data['humidite'].dtype == np.int64}")

print(f"Temperature dtype: {formated_data['temperature_en_degre_c'].dtype}")
print(f"Temperature is float64: {formated_data['temperature_en_degre_c'].dtype == np.float64}")

print(f"Heure dtype: {formated_data['heure_de_paris'].dtype}")
print(f"Heure is datetime: {pd.api.types.is_datetime64_any_dtype(formated_data['heure_de_paris'])}")


Pressure dtype: int32
Pressure is int64: False
Humidity dtype: int16
Humidity is int64: False
Temperature dtype: float32
Temperature is float64: False
Heure dtype: datetime64[ns, UTC]
Heure is datetime: True


In [7]:
valid_temperature = data['temperature_en_degre_c'].between(-10, 50, inclusive='both').all()
valid_humidity = data['humidite'].between(0, 100, inclusive='both').all()
valid_pressure = data['pression'].between(95000, 105000, inclusive='both').all()

bool(valid_temperature and valid_humidity and valid_pressure)

True