## How to fetch temp + elevation data (Open-Meteo)

1. Run the hex grid builder cell to create `hex_df` (lat/lon/hex_id) and set the batch count.
2. Run the batch helper cell to see how many batches you need.
3. For each batch, run the BATCH cell with the right `batch_number` and append the result to `all_weather_data`.
4. When all batches are collected, combine and save to CSV (columns: hex_id, local_temp_c, elevation_m).

In [1]:
import requests
import pandas as pd
import time
import math
import numpy as np

# Configuration
MACRO_BATCH_SIZE = 500  # How many rows YOU want to process at once
MICRO_BATCH_SIZE = 100  # How many rows sent to API per call (API Limit)
HEX_DF_LENGTH = None    # Will be set after hex_df is built
USA_URL = "https://raw.githubusercontent.com/PublicaMundi/MappingAPI/master/data/geojson/us-states.json"
HEX_RADIUS = 50_000     # 50 km radius hex (matches maps-real.ipynb)


def fetch_weather_manual_batch(df_all_hexes, batch_number):
    """
    Fetches weather for a specific slice of the dataframe.
    Pulls 30-day mean temp and site elevation from Open-Meteo.
    batch_number: 1-based index (1, 2, 3...)
    """
    # 1. Calculate the Slice Indices
    start_idx = (batch_number - 1) * MACRO_BATCH_SIZE
    end_idx = start_idx + MACRO_BATCH_SIZE

    # Get the subset of data for this manual run
    df_batch = df_all_hexes.iloc[start_idx : end_idx].copy()

    if df_batch.empty:
        print(f"⚠️ Batch {batch_number} is empty. (You might be done!)")
        return pd.DataFrame()

    print(f"⚡ Processing Batch {batch_number} (Rows {start_idx} to {min(end_idx, len(df_all_hexes))})...")

    # Prepare lists for API
    lats = df_batch['lat'].tolist()
    lons = df_batch['lon'].tolist()
    ids = df_batch['hex_id'].tolist()

    weather_data = []

    # 2. Internal Micro-Batching (Chunks of 100)
    for i in range(0, len(df_batch), MICRO_BATCH_SIZE):
        # Slicing the lists
        chunk_ids = ids[i : i + MICRO_BATCH_SIZE]
        chunk_lats = lats[i : i + MICRO_BATCH_SIZE]
        chunk_lons = lons[i : i + MICRO_BATCH_SIZE]

        params = {
            "latitude": ",".join(map(str, chunk_lats)),
            "longitude": ",".join(map(str, chunk_lons)),
            "daily": "temperature_2m_mean",
            "past_days": 30,
            "timezone": "auto",
        }

        try:
            r = requests.get("https://api.open-meteo.com/v1/forecast", params=params, timeout=10)
            r.raise_for_status()
            responses = r.json()

            if not isinstance(responses, list):
                responses = [responses]

            for hex_id, resp in zip(chunk_ids, responses):
                daily_temps = resp.get('daily', {}).get('temperature_2m_mean', [])
                if daily_temps:
                    valid = [t for t in daily_temps if t is not None]
                    avg_temp = sum(valid) / len(valid) if valid else float('nan')
                else:
                    avg_temp = float('nan')

                elevation_m = resp.get('elevation', np.nan)

                weather_data.append({
                    'hex_id': hex_id,
                    'local_temp_c': avg_temp,
                    'elevation_m': elevation_m,
                })

            print(f"   ... Sub-batch {i//MICRO_BATCH_SIZE + 1} complete.")
            time.sleep(3)  # Short pause between API calls

        except Exception as e:
            print(f"   ❌ Error on sub-batch: {e}")

    print(f"✅ Batch {batch_number} Finished.")
    return pd.DataFrame(weather_data)


In [2]:
import geopandas as gpd
from shapely.geometry import Polygon


def make_hex(center_x, center_y, radius):
    angles = np.radians(np.arange(30, 390, 60))
    overlap_factor = 1.001  # tiny overlap to avoid gaps
    return Polygon([
        (
            center_x + radius * overlap_factor * np.cos(a),
            center_y + radius * overlap_factor * np.sin(a),
        )
        for a in angles
    ])


def build_hex_df():
    usa = gpd.read_file(USA_URL)
    usa_border = usa.unary_union
    usa_gdf = gpd.GeoDataFrame(geometry=[usa_border], crs="EPSG:4326")
    usa_proj = usa_gdf.to_crs("EPSG:5070")

    hex_width = 2 * HEX_RADIUS
    hex_height = np.sqrt(3) * HEX_RADIUS
    dx = np.sqrt(3) * HEX_RADIUS
    dy = 0.865 * hex_height

    minx, miny, maxx, maxy = usa_proj.total_bounds

    hexes = []
    row = 0
    y = miny - hex_height
    while y < maxy + hex_height:
        x_offset = (row % 2) * (dx / 2)
        x = minx - hex_width
        while x < maxx + hex_width:
            hexes.append(make_hex(x + x_offset, y, HEX_RADIUS))
            x += dx
        y += dy
        row += 1

    hexgrid_proj = gpd.GeoDataFrame(geometry=hexes, crs="EPSG:5070")
    hex_us_proj = gpd.overlay(hexgrid_proj, usa_proj, how="intersection")
    hex_us = hex_us_proj.to_crs("EPSG:4326")
    centers = hex_us.geometry.centroid

    return pd.DataFrame({
        "hex_id": np.arange(len(hex_us)),
        "lat": centers.y,
        "lon": centers.x,
    })


hex_df = build_hex_df()
HEX_DF_LENGTH = len(hex_df)
print(f"Built hex_df with {HEX_DF_LENGTH} hexes")
hex_df


  usa_border = usa.unary_union


Built hex_df with 1723 hexes



  centers = hex_us.geometry.centroid


Unnamed: 0,hex_id,lat,lon
0,0,18.080934,-66.997980
1,1,17.964432,-66.248199
2,2,18.363018,-67.069760
3,3,18.252461,-66.508874
4,4,18.213385,-65.929023
...,...,...,...
1718,1718,70.654117,-159.903567
1719,1719,52.820630,171.353883
1720,1720,52.921415,171.757686
1721,1721,68.506723,-166.350210


In [3]:
# Check how many batches you need
# (HEX_DF_LENGTH was set when hex_df was built)
total_hexes = HEX_DF_LENGTH or len(hex_df)
total_batches = math.ceil(total_hexes / MACRO_BATCH_SIZE)
print(f"You have {total_hexes} hexagons.")
print(f"You need to run Batches 1 through {total_batches}.")

# Container to store results
if 'all_weather_data' not in locals():
    all_weather_data = []


You have 1723 hexagons.
You need to run Batches 1 through 4.


In [13]:
# --- BATCH ---
# Example: start with batch 1 (change batch_number each run)
df_b4 = fetch_weather_manual_batch(hex_df, batch_number=4)
all_weather_data.append(df_b4)

⚡ Processing Batch 4 (Rows 1500 to 1723)...
   ... Sub-batch 1 complete.
   ... Sub-batch 2 complete.
   ... Sub-batch 3 complete.
✅ Batch 4 Finished.


In [17]:
all_weather_data[3]

Unnamed: 0,hex_id,local_temp_c,elevation_m
0,1500,-7.572973,1263.0
1,1501,-11.308108,531.0
2,1502,-12.637838,1211.0
3,1503,-10.824324,883.0
4,1504,-12.427027,260.0
...,...,...,...
218,1718,-7.521622,10.0
219,1719,4.272973,0.0
220,1720,4.278378,0.0
221,1721,-4.400000,0.0


In [18]:
# Combine all collected batches
all_weather_data_df = pd.concat(all_weather_data, ignore_index=True)

In [20]:
# Save combined temp + elevation data
all_weather_data_df.to_csv('hex_weather_data_all.csv', index=False)