# Data ingestion – Open-Meteo

In [1]:
import requests
import pandas as pd
import os

## Definition of study perimeter

In [2]:
CITIES = {
    "Paris" : (48.8566, 2.3522),
    "Lille" : (50.6292, 3.0573),
    "Rouen" : (49.4431, 1.0993),
    "Rennes" : (48.1173, -1.6778),
    "Nantes" : (47.2184, -1.5536),
    "Orleans" : (47.9025, 1.9090),
    "Dijon" : (47.3220, 5.0415),                
    "Besançon" : (47.2378, 6.0241),             
    "Strasbourg" : (48.5734, 7.7521),           
    "Metz" : (49.1193, 6.1757),                 
    "Reims" : (49.2583, 4.0317),                
    "Lyon" : (45.7640, 4.8357),                
    "Clermont-Ferrand" : (45.7772, 3.0870),     
    "Marseille" : (43.2965, 5.3698),            
    "Ajaccio" : (41.9192, 8.7386),             
    "Bordeaux" : (44.8378, -0.5792),           
    "Toulouse" : (43.6047, 1.4442),             
    "Montpellier" : (43.6119, 3.8777),          
    "Poitiers" : (46.5802, 0.3404),             
    "Limoges" : (45.8336, 1.2611)
}

API_URL = "https://archive-api.open-meteo.com/v1/archive"
START_DATE = "2022-01-01"
END_DATE = "2022-12-31"

## Weather data extraction function

In [3]:
def fetch_historical_weather(city, lat, lon):

    params = {
        "latitude": lat,
        "longitude": lon,
        "start_date": START_DATE,
        "end_date": END_DATE,
        "daily": "temperature_2m_max,temperature_2m_min,precipitation_sum",
        "timezone": "Europe/Paris"
    }

    response = requests.get(API_URL, params=params, timeout=10)

    if response.status_code == 200:

        json_data = response.json()

        if "daily" not in json_data:
            print(f"Unexpected API response for {city}")
            return pd.DataFrame()

        weather_data = json_data["daily"]

        weather_df = pd.DataFrame(weather_data)
        weather_df["time"] = pd.to_datetime(weather_df["time"])
        weather_df["city"] = city

        return weather_df

    else:
        print(f"Error API for {city}: {response.status_code}")
        return pd.DataFrame()


## Data aggregation and persistence

In [4]:
all_data = pd.concat(
    [
        fetch_historical_weather(city, lat, lon)
        for city, (lat, lon) in CITIES.items()
    ],
    ignore_index=True
)

# Basic data validation
expected_rows = len(CITIES) * 365
print("Expected rows:", expected_rows)
print("Actual rows:", len(all_data))

print("Date range:", all_data["time"].min(), "to", all_data["time"].max())

print(all_data.isna().sum())

# Rows per city validation
rows_per_city = all_data.groupby("city").size()
print(rows_per_city)

# Save raw dataset
os.makedirs("data/raw", exist_ok=True)
all_data.to_csv("data/raw/weather_2022_raw.csv", index=False)

print("Dataset saved to data/raw/weather_2022_raw.csv")



Expected rows: 7300
Actual rows: 7300
Date range: 2022-01-01 00:00:00 to 2022-12-31 00:00:00
time                  0
temperature_2m_max    0
temperature_2m_min    0
precipitation_sum     0
city                  0
dtype: int64
city
Ajaccio             365
Besançon            365
Bordeaux            365
Clermont-Ferrand    365
Dijon               365
Lille               365
Limoges             365
Lyon                365
Marseille           365
Metz                365
Montpellier         365
Nantes              365
Orleans             365
Paris               365
Poitiers            365
Reims               365
Rennes              365
Rouen               365
Strasbourg          365
Toulouse            365
dtype: int64
Dataset saved to data/raw/weather_2022_raw.csv
