In [None]:
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import Point
import os
import sys
import requests
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
import matplotlib.pyplot as plt
import time 

from vayu_gnn.dbx.dbx_config import dbx_helper, DropboxHelper

### Add extra nodes for neighboring weather data

In [7]:
city = 'Patna'

devices = dbx_helper.read_pickle(dbx_helper.clean_input_path, 'map_device_to_latlong', f'{city}_static.pickle')

# Calculate the center (average latitude and longitude) from the existing devices
center_lat = float(sum(device['lat'] for device in devices.values()) / len(devices))
center_long = float(sum(device['long'] for device in devices.values()) / len(devices))

half_side = 1
offsets = np.linspace(-half_side, half_side, 9)  # 9 equally spaced points from -1 to 1

extra_nodes = {}
node_counter = 1

# Top side: fixed latitude (center_lat + 1), longitude varies by offset
for offset in offsets:
    device_id = f"extra_node_{node_counter}"
    lat_val = float(round(center_lat + half_side, 4))
    long_val = float(round(center_long + offset, 4))
    extra_nodes[device_id] = {'lat': lat_val, 'long': long_val}
    node_counter += 1

# Bottom side: fixed latitude (center_lat - 1), longitude varies by offset
for offset in offsets:
    device_id = f"extra_node_{node_counter}"
    lat_val = float(round(center_lat - half_side, 4))
    long_val = float(round(center_long + offset, 4))
    extra_nodes[device_id] = {'lat': lat_val, 'long': long_val}
    node_counter += 1

# Left side: fixed longitude (center_long - 1), latitude varies by offset (excluding corners)
for offset in offsets[1:-1]:
    device_id = f"extra_node_{node_counter}"
    lat_val = float(round(center_lat + offset, 4))
    long_val = float(round(center_long - half_side, 4))
    extra_nodes[device_id] = {'lat': lat_val, 'long': long_val}
    node_counter += 1

# Right side: fixed longitude (center_long + 1), latitude varies by offset (excluding corners)
for offset in offsets[1:-1]:
    device_id = f"extra_node_{node_counter}"
    lat_val = float(round(center_lat + offset, 4))
    long_val = float(round(center_long + half_side, 4))
    extra_nodes[device_id] = {'lat': lat_val, 'long': long_val}
    node_counter += 1

combined_devices = {**devices, **extra_nodes}

dbx_helper.write_pickle(combined_devices, dbx_helper.clean_input_path, f'node_locations/{city}', f'nodes.pickle')

# Also save as a gdf
records = []
for device_id, coords in combined_devices.items():
    records.append({
        'device_id': device_id,
        'lat': coords['lat'],
        'long': coords['long']
    })

df = pd.DataFrame(records)
df['geometry'] = df.apply(lambda row: Point(row['long'], row['lat']), axis=1)
gdf = gpd.GeoDataFrame(df, geometry='geometry', crs="EPSG:4326")

dbx_helper.write_shp(gdf, dbx_helper.clean_input_path, f'node_locations/{city}/gdf', f'nodes')


File 'nodes.pickle' successfully uploaded to Dropbox path: '/input/clean/node_locations/Patna/nodes.pickle'
Shapefile written to temporary directory: /var/folders/w2/spn01gpx39d_btp2kmjt17jh0000gn/T/tmpamjruw37
File 'nodes.shp' successfully uploaded to Dropbox at '/input/clean/node_locations/Patna/gdf/nodes.shp'
File 'nodes.shx' successfully uploaded to Dropbox at '/input/clean/node_locations/Patna/gdf/nodes.shx'
File 'nodes.dbf' successfully uploaded to Dropbox at '/input/clean/node_locations/Patna/gdf/nodes.dbf'
File 'nodes.prj' successfully uploaded to Dropbox at '/input/clean/node_locations/Patna/gdf/nodes.prj'
File 'nodes.cpg' successfully uploaded to Dropbox at '/input/clean/node_locations/Patna/gdf/nodes.cpg'


In [None]:
gdf

### Fetch historical weather data

In [None]:
import pandas as pd
import requests_cache
from openmeteo_requests import Client
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after=3600)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
openmeteo = Client(session=retry_session)

url = "https://archive-api.open-meteo.com/v1/archive"
# Base params
params = {
    "start_date": "2024-05-01",
    "end_date": "2025-02-28",
    "hourly": [
        "temperature_2m",
        "wind_speed_10m",
        "wind_direction_10m",
        "relative_humidity_2m",
        "precipitation",
        "soil_temperature_0_to_7cm",
        "soil_moisture_0_to_7cm",
        "surface_pressure",
        "cloud_cover"
    ]
}

dataframes = []

for node_name, coords in combined_devices.items():
    print(node_name)
    
    params["latitude"] = coords["lat"]
    params["longitude"] = coords["long"]
    
    # Query the API (assuming each call returns a list with one response)
    responses = openmeteo.weather_api(url, params=params)
    response = responses[0]
    
    # Extract the API returned coordinates and metadata
    api_lat = response.Latitude()
    api_long = response.Longitude()
    
    # Process hourly data from the response
    hourly = response.Hourly()
    hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
    hourly_wind_speed_10m = hourly.Variables(1).ValuesAsNumpy()
    hourly_wind_direction_10m = hourly.Variables(2).ValuesAsNumpy()
    hourly_relative_humidity_2m = hourly.Variables(3).ValuesAsNumpy()
    hourly_precipitation = hourly.Variables(4).ValuesAsNumpy()
    hourly_soil_temperature_0_to_7cm = hourly.Variables(5).ValuesAsNumpy()
    hourly_soil_moisture_0_to_7cm = hourly.Variables(6).ValuesAsNumpy()
    hourly_surface_pressure = hourly.Variables(7).ValuesAsNumpy()
    hourly_cloud_cover = hourly.Variables(8).ValuesAsNumpy()
    
    # Create a date range based on the hourly metadata
    hourly_data = {
        "date": pd.date_range(
            start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
            end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=hourly.Interval()),
            inclusive="left"
        )
    }
    
    # Add all the requested weather variables to the data dictionary
    hourly_data["temperature"] = hourly_temperature_2m
    hourly_data["wind_speed"] = hourly_wind_speed_10m
    hourly_data["wind_direction"] = hourly_wind_direction_10m
    hourly_data["humidity"] = hourly_relative_humidity_2m
    hourly_data["precipitation"] = hourly_precipitation
    hourly_data["soil_temperature"] = hourly_soil_temperature_0_to_7cm
    hourly_data["soil_moisture"] = hourly_soil_moisture_0_to_7cm
    hourly_data["pressure"] = hourly_surface_pressure
    hourly_data["cloud_cover"] = hourly_cloud_cover
    
    # Create a DataFrame for the current device
    df = pd.DataFrame(data=hourly_data)
    
    # Add metadata columns from both the device dictionary and the API response
    df["node"] = node_name
    df["device_lat"] = coords["lat"]
    df["device_long"] = coords["long"]
    df["om_lat"] = api_lat
    df["om_long"] = api_long

    # write to dropbox
    dbx_helper.write_csv(df, dbx_helper.clean_input_path, f'node_level_weather_data/historical_weather/{city}', f"{city}_{node_name}.csv")

    time.sleep(3)

In [None]:
city = 'Patna'

base_path = dbx_helper.clean_input_path
directory = f'node_level_weather_data/historical_weather/{city}'
full_path = os.path.join(base_path, directory)

files = dbx_helper.list_files_in_folder(full_path)
dfs = [dbx_helper.read_csv(base_path, directory, file) for file in files]
combined_df = pd.concat(dfs, ignore_index=True)

dbx_helper.write_csv(combined_df, base_path, 'node_level_weather_data/historical_weather', f'{city}_historical_weather_data.csv')

### Fetch historical weather forecasts

In [None]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after=3600)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
openmeteo = Client(session=retry_session)

url = "https://historical-forecast-api.open-meteo.com/v1/forecast"
# Base params
params = {
    "start_date": "2024-05-01",
    "end_date": "2025-02-28",
    "hourly": [
        "temperature_2m",
        "wind_speed_10m",
        "wind_direction_10m",
        "relative_humidity_2m",
        "precipitation",
        "soil_temperature_0cm",
        "soil_moisture_0_to_1cm",
        "surface_pressure",
        "cloud_cover"
    ]
}

dataframes = []

for node_name, coords in combined_devices.items():
    print(node_name)
    
    params["latitude"] = coords["lat"]
    params["longitude"] = coords["long"]
    
    # Query the API (assuming each call returns a list with one response)
    responses = openmeteo.weather_api(url, params=params)
    response = responses[0]
    
    # Extract the API returned coordinates and metadata
    api_lat = response.Latitude()
    api_long = response.Longitude()
    
    # Process hourly data from the response
    hourly = response.Hourly()
    hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
    hourly_wind_speed_10m = hourly.Variables(1).ValuesAsNumpy()
    hourly_wind_direction_10m = hourly.Variables(2).ValuesAsNumpy()
    hourly_relative_humidity_2m = hourly.Variables(3).ValuesAsNumpy()
    hourly_precipitation = hourly.Variables(4).ValuesAsNumpy()
    hourly_soil_temperature_0cm = hourly.Variables(5).ValuesAsNumpy()
    hourly_soil_moisture_0_to_1cm = hourly.Variables(6).ValuesAsNumpy()
    hourly_surface_pressure = hourly.Variables(7).ValuesAsNumpy()
    hourly_cloud_cover = hourly.Variables(8).ValuesAsNumpy()
    
    # Create a date range based on the hourly metadata
    hourly_data = {
        "date": pd.date_range(
            start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
            end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=hourly.Interval()),
            inclusive="left"
        )
    }
    
    # Add all the requested weather variables to the data dictionary
    hourly_data["temperature"] = hourly_temperature_2m
    hourly_data["wind_speed"] = hourly_wind_speed_10m
    hourly_data["wind_direction"] = hourly_wind_direction_10m
    hourly_data["humidity"] = hourly_relative_humidity_2m
    hourly_data["precipitation"] = hourly_precipitation
    hourly_data["soil_temperature"] = hourly_soil_temperature_0cm
    hourly_data["soil_moisture"] = hourly_soil_moisture_0_to_1cm
    hourly_data["pressure"] = hourly_surface_pressure
    hourly_data["cloud_cover"] = hourly_cloud_cover
    
    # Create a DataFrame for the current device
    df = pd.DataFrame(data=hourly_data)
    
    # Add metadata columns from both the device dictionary and the API response
    df["node"] = node_name
    df["device_lat"] = coords["lat"]
    df["device_long"] = coords["long"]
    df["om_lat"] = api_lat
    df["om_long"] = api_long

    # write to dropbox
    dbx_helper.write_csv(df, dbx_helper.clean_input_path, f'node_level_weather_data/historical_weather_forecasts/{city}', f"{city}_{node_name}.csv")

    time.sleep(3)

In [None]:
city = 'Patna'

base_path = dbx_helper.clean_input_path
directory = f'node_level_weather_data/historical_weather_forecasts/{city}'
full_path = os.path.join(base_path, directory)

files = dbx_helper.list_files_in_folder(full_path)
dfs = [dbx_helper.read_csv(base_path, directory, file) for file in files]
combined_df = pd.concat(dfs, ignore_index=True)

dbx_helper.write_csv(combined_df, base_path, 'node_level_weather_data/historical_weather_forecasts', f'{city}_historical_weather_forecast_data.csv')

Files in folder '/input/clean/node_level_weather_data/historical_weather_forecasts/Patna':
Size of the CSV file: 58.64 MB
File 'Patna_historical_weather_forecast_data.csv' successfully uploaded to Dropbox path: '/input/clean/node_level_weather_data/historical_weather_forecasts/Patna/Patna_historical_weather_forecast_data.csv'
