In [13]:
import requests
import pandas as pd
import time

In [10]:
START, END = 2000, 2024            
TEMPORAL = "daily"                  

PARAMS = [
    # precipitation & humidity
    "PRECTOTCORR","RH2M","QV2M",
    # temperature (2m)
    "T2M","T2M_MAX","T2M_MIN","T2M_RANGE",
    # wind (2m,10m), direction (10m)
    "WS2M","WS10M","WD10M",
    # radiation (shortwave, all-sky, clear-sky)
    "ALLSKY_SFC_SW_DWN","CLRSKY_SFC_SW_DWN",
    # pressure
    "PS",
    # soil moisture (top + root zone)
    "GWETTOP","GWETROOT"
]

# Île-de-France departments (INSEE, name, lat, lon)
IDF = [
    ("75","Paris",              48.8566, 2.3522),
    ("77","Seine-et-Marne",     48.539,  2.999),
    ("78","Yvelines",           48.800,  1.900),
    ("91","Essonne",            48.530,  2.250),
    ("92","Hauts-de-Seine",     48.840,  2.240),
    ("93","Seine-Saint-Denis",  48.910,  2.450),
    ("94","Val-de-Marne",       48.770,  2.430),
    ("95","Val-d'Oise",         49.070,  2.200),
]


In [11]:

def fetch_power(lat, lon, start_year, end_year, temporal="daily"):
    """
    Return a tidy DataFrame with one row per date (daily or monthly)
    and columns for each NASA POWER variable.
    No aggregation.
    """
    if temporal not in ("daily","monthly"):
        raise ValueError("temporal must be 'daily' or 'monthly'")

    url = (
        f"https://power.larc.nasa.gov/api/temporal/{temporal}/point"
        f"?parameters={','.join(PARAMS)}"
        f"&community=AG&longitude={lon}&latitude={lat}"
        f"&start={start_year}&end={end_year}&format=JSON"
    )
    r = requests.get(url, timeout=90)
    if not r.ok or "application/json" not in (r.headers.get("Content-Type","").lower()):
        raise RuntimeError(f"POWER error {r.status_code}: {r.text[:200]}")

    param = r.json().get("properties",{}).get("parameter",{})
    rows = []
    for var, series in param.items():
        for key, val in series.items():
            if temporal == "daily":   
                y, m, d = int(key[:4]), int(key[4:6]), int(key[6:8])
            else:                     
                y, m, d = int(key[:4]), int(key[4:]), 1
            rows.append({"year":y,"month":m,"day":d,"var":var,"value":float(val)})
    long = pd.DataFrame(rows).sort_values(["year","month","day"])
    wide = long.pivot(index=["year","month","day"], columns="var", values="value").reset_index()
    # add a proper datetime column
    wide["date"] = pd.to_datetime(dict(year=wide.year, month=wide.month, day=wide.day))
    # put date first
    cols = ["date","year","month","day"] + [c for c in wide.columns if c not in ("date","year","month","day")]
    return wide[cols]

In [12]:

all_frames = []
for code, name, lat, lon in IDF:
    print(f"Downloading {name} ({code})")
    df = fetch_power(lat, lon, START, END, temporal=TEMPORAL)
    df["insee_dep"], df["department"] = code, name
    df.to_csv(f"idf_{code}_{TEMPORAL}_power_{START}_{END}.csv", index=False)
    all_frames.append(df)

idf_all = pd.concat(all_frames, ignore_index=True)
idf_all.to_csv(f"idf_all_{TEMPORAL}_power_{START}_{END}.csv", index=False)

print("Done:", idf_all.shape, "rows")

Downloading Paris (75)
Downloading Seine-et-Marne (77)
Downloading Yvelines (78)
Downloading Essonne (91)
Downloading Hauts-de-Seine (92)
Downloading Seine-Saint-Denis (93)
Downloading Val-de-Marne (94)
Downloading Val-d'Oise (95)
Done: (73056, 21) rows


In [17]:
idf_all.head()

var,date,year,month,day,ALLSKY_SFC_SW_DWN,CLRSKY_SFC_SW_DWN,GWETROOT,GWETTOP,PRECTOTCORR,PS,...,RH2M,T2M,T2M_MAX,T2M_MIN,T2M_RANGE,WD10M,WS10M,WS2M,insee_dep,department
0,2000-01-01,2000,1,1,2.05,5.2,0.64,0.66,0.88,101.49,...,99.07,6.12,8.51,4.37,4.14,264.4,2.33,1.41,75,Paris
1,2000-01-02,2000,1,2,2.26,5.18,0.63,0.65,0.35,101.77,...,98.04,6.08,7.41,5.32,2.09,192.8,3.61,2.42,75,Paris
2,2000-01-03,2000,1,3,3.45,4.99,0.63,0.65,0.31,101.46,...,93.75,4.85,7.19,3.55,3.64,197.7,6.04,4.23,75,Paris
3,2000-01-04,2000,1,4,1.82,5.36,0.63,0.66,4.74,100.81,...,96.94,5.88,8.42,3.84,4.58,216.4,6.45,4.44,75,Paris
4,2000-01-05,2000,1,5,3.49,5.02,0.63,0.65,0.22,100.92,...,97.22,5.05,8.36,3.58,4.78,196.5,4.18,2.85,75,Paris


In [14]:
idf_all.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 73056 entries, 0 to 73055
Data columns (total 21 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   date               73056 non-null  datetime64[ns]
 1   year               73056 non-null  int64         
 2   month              73056 non-null  int64         
 3   day                73056 non-null  int64         
 4   ALLSKY_SFC_SW_DWN  73056 non-null  float64       
 5   CLRSKY_SFC_SW_DWN  73056 non-null  float64       
 6   GWETROOT           73056 non-null  float64       
 7   GWETTOP            73056 non-null  float64       
 8   PRECTOTCORR        73056 non-null  float64       
 9   PS                 73056 non-null  float64       
 10  QV2M               73056 non-null  float64       
 11  RH2M               73056 non-null  float64       
 12  T2M                73056 non-null  float64       
 13  T2M_MAX            73056 non-null  float64       
 14  T2M_MI

In [16]:
idf_all.isnull().sum()

var
date                 0
year                 0
month                0
day                  0
ALLSKY_SFC_SW_DWN    0
CLRSKY_SFC_SW_DWN    0
GWETROOT             0
GWETTOP              0
PRECTOTCORR          0
PS                   0
QV2M                 0
RH2M                 0
T2M                  0
T2M_MAX              0
T2M_MIN              0
T2M_RANGE            0
WD10M                0
WS10M                0
WS2M                 0
insee_dep            0
department           0
dtype: int64