In [71]:
import psycopg2

from pathlib import Path

import json

import math

import numpy as np
import pandas as pd
import geopandas as gpd

from shapely.geometry import Point, box
import folium


from sqlalchemy import URL, create_engine, inspect, text as sql_text
from sqlalchemy.orm import sessionmaker

from geoalchemy2 import Geometry

from pyagnps import climate


In [6]:
# thucs_to_process = set(['0593', '0594', '0595', '1148', '1149', '1150'])
thucs_to_process = set(['0593'])

In [7]:
path_grid = Path('../../inputs/climate/NLDAS2_GRID_CENTROIDS_epsg4326.gpkg')
nldas2_grid = gpd.read_file(path_grid)

In [8]:
path_thucs = Path('../../inputs/thucs/tophuc_S_M_40000_closed_holes_with_container_thuc_merged_bbox_area_first_kept.gpkg')
thucs = gpd.read_file(path_thucs)

In [9]:
my_thuc = thucs.loc[thucs['tophucid'].isin(['0593']),:]

buffered_geom = my_thuc.geometry.iloc[0].buffer(math.sqrt(2)/2*0.125) # We buffer by sqrt(2)/2 * (nldas_2 spacing) in degrees to get all the likely needed gird points

# Create a bounding box geometry
buffered_thuc = gpd.GeoDataFrame({'geometry': buffered_geom}, index=[0], crs=my_thuc.crs)

# Perform the spatial join
contained_stations = gpd.sjoin(nldas2_grid, buffered_thuc, how='inner', predicate='within')



thuc_map = my_thuc.explore(name='THUC')

stations_map = contained_stations.explore(name="Stations", color='red', m=thuc_map)



folium.LayerControl().add_to(stations_map)

stations_map

In [10]:
def open_creds_dict(path_to_json_creds):
    with open(path_to_json_creds, "r") as f:
        credentials = json.load(f)
        return credentials

# DATABASE SETUP
path_to_creds_aims = Path("../../inputs/db_credentials.json")
path_to_creds_menderes = Path("../../inputs/db_credentials_menderes.json")

creds = {
    'aims': open_creds_dict(path_to_creds_aims),
    'menderes': open_creds_dict(path_to_creds_menderes),
    'docker': {
            'user': 'postgres',
            'password': 'postgres_pass',
            'host': 'localhost',
            'port': '5432',
            'database': 'test_db'
        }
}

In [11]:
url_object = lambda db : URL.create(
                        "postgresql",
                        username=creds[db]['user'],
                        password=creds[db]['password'],
                        host=creds[db]['host'],
                        port=creds[db]['port'],
                        database=creds[db]['database'])

db_url = url_object('docker')

engine = create_engine(db_url)
conn = engine.connect().execution_options(stream_results=True)

In [12]:
# table = 'thuc_0593_annagnps_cell_ids'
# # table = 'thuc_0593_annagnps_reach_ids' 

# gdf = gpd.read_postgis(sql_text(f"SELECT * FROM {table}"), conn, geom_col="geom")

# table = 'thuc_0593_annagnps_cell_data_section'
# df = pd.read_sql(sql_text(f"SELECT * FROM {table}"), conn)
# df

# gdf = gdf.merge(df, left_on='dn', right_on='cell_id', how='outer')

Initialize table in the database

In [149]:
query = """
CREATE TABLE climate_nldas2 (
    station_id TEXT,
    date DATE,
    month INT2,
    day INT2,
    year INT2,
    max_air_temperature FLOAT4,
    min_air_temperature FLOAT4,
    precip FLOAT4,
    dew_point FLOAT4,
    sky_cover FLOAT4,
    wind_speed FLOAT4,
    wind_direction FLOAT4,
    solar_radiation FLOAT4,
    storm_type_id TEXT,
    potential_et FLOAT4,
    actual_et FLOAT4,
    actual_ei FLOAT4,
    input_units_code INT2,
    geom GEOMETRY(Point, 4326),
    PRIMARY KEY (station_id, date)
);

-- Convert the table into a hypertable
SELECT create_hypertable('climate_nldas2', 'date');

-- Enable compression
ALTER TABLE climate_nldas2 SET (timescaledb.compress, timescaledb.compress_orderby = 'date', timescaledb.compress_segmentby = 'station_id');

-- Set up chunking
SELECT set_chunk_time_interval('climate_nldas2', INTERVAL '1 month');

"""

with engine.connect() as connection:
    connection.execute(sql_text(query))


Query Climate

In [9]:
from pyagnps import climate

In [23]:
def prepare_annagnps_climate_for_db(clm, station_id, xgrid, ygrid):
    """
    Prepare climate data for insertion into the climate_nldas2 table
    * Inputs:
    - clm: pandas.DataFrame in AnnAGNPS format
    - station_id: str
    - xgrid: float longitude in EPSG:4326
    - ygrid: float latitude in EPSG:4326
    * Output:
    - gdf_clm: GeoDataFrame in EPSG:4326
    """
    clm.columns = clm.columns.str.lower()
    clm['station_id'] = station_id

    gdf_clm = gpd.GeoDataFrame(clm, geometry=[Point(xgrid, ygrid)] * len(clm), crs="EPSG:4326")
    gdf_clm.rename(columns={'geometry': 'geom'}, inplace=True)
    gdf_clm = gdf_clm.set_geometry('geom')

    return gdf_clm

In [55]:
for row in nldas2_grid.iterfeatures():
    x, y = row['geometry']['coordinates']
    station_id = f"{row['properties']['nldas2_grid_ID']}"

    clm_annagnps = climate.ClimateAnnAGNPSCoords(coords=(x,y), start="1980-01-01", end="1981-07-31", date_mode="local")
    clm = clm_annagnps.query_nldas2_generate_annagnps_climate_daily(float_format='%.2f')

    gdf_clm = prepare_annagnps_climate_for_db(clm, station_id, x, y)

    print(x,y, station_id)
    break

-124.9375 52.9375 103473


In [191]:
clm

Unnamed: 0_level_0,Month,Day,Year,Max_Air_Temperature,Min_Air_Temperature,Precip,Dew_Point,Sky_Cover,Wind_Speed,Wind_Direction,Solar_Radiation,Storm_Type_ID,Potential_ET,Actual_ET,Actual_EI,Input_Units_Code
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1980-01-01,1,1,1980,12.050000,0.760000,0.0000,-0.681253,,2.231398,103.211571,128.114166,,2.2029,,,1
1980-01-02,1,2,1980,14.380000,1.230000,0.0000,-0.861304,,2.056890,292.809906,117.851501,,2.0488,,,1
1980-01-03,1,3,1980,9.920000,5.310000,15.6493,1.186351,,3.250901,230.432938,41.063084,,1.1566,,,1
1980-01-04,1,4,1980,8.170000,0.610000,0.3036,1.029495,,3.409409,153.014587,130.072006,,2.3345,,,1
1980-01-05,1,5,1980,9.670000,2.450000,0.3595,-3.143534,,1.656260,179.003555,116.239250,,2.3259,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1981-07-27,7,27,1981,33.880001,25.500000,3.5917,22.780249,,2.851631,166.485825,214.270996,,6.5142,,,1
1981-07-28,7,28,1981,33.680000,25.500000,0.1320,22.250149,,2.902539,44.719715,213.845413,,6.7388,,,1
1981-07-29,7,29,1981,33.380001,26.670000,2.3658,22.837992,,1.894577,123.436813,181.741089,,5.5451,,,1
1981-07-30,7,30,1981,35.139999,25.540001,0.4994,20.735432,,2.093864,220.184219,264.637848,,8.5752,,,1


In [22]:
# Useless because we're using the nldas2_grid directly

# def get_nearest_nldas2_grid_id_xy(x, y, nldas2_grid):
#     nearest_idx = list(nldas2_grid.sindex.nearest(Point(x,y), return_all=False, max_distance=0.125))[1] # Max distance 1/8th of a degree
#     point = nldas2_grid[nldas2_grid.index.isin(nearest_idx)]
#     grid_id = point.iloc[0].nldas2_grid_ID
#     xgrid, ygrid = point.iloc[0].geometry.xy
#     xgrid, ygrid = xgrid[0], ygrid[0]
#     return xgrid, ygrid, grid_id


# x, y = -89.8125 + 1/16, 32.3125 + 1/16
# x, y = -91.81, 32.407

# xgrid, ygrid, grid_id = get_nearest_nldas2_grid_id_xy(x, y, nldas2_grid)

# print(f"Queried point: x = {x}, y = {y}")
# print(f"Nearest NLDAS-2 Grid ID: {grid_id}, x = {xgrid}, y = {ygrid}")

In [196]:

def insert_climate_nldas2(gdf_clm, table="climate_nldas2"):
    gdf_clm.to_postgis("climate_nldas2", engine, if_exists="append", index=True)


In [35]:
# Write a function that tests if there is data for a speicific station

def climate_table_has_station(station_name, engine, table="climate_nldas2"):
    # Also return the maximum and minimum date in the table
    query = f"""
        SELECT MIN(date) AS min_date, MAX(date) AS max_date
        FROM {table}
        WHERE station_id = '{station_name}'
        GROUP BY station_id
    """
    with engine.connect() as connection:
        result = connection.execute(sql_text(query))
        
        # Check if the query returned any rows
        if result.rowcount > 0:
            row = result.fetchone()
            min_date = row[0]
            max_date = row[1]
            return True, min_date, max_date
        else:
            return False, None, None

In [72]:
def filter_climate_data(gdf_clm, min_date, max_date):
    """Returns a subset of gdf_clm that is outside the min_date and max_date interval

    gdf_clm: GeoDataFrame
    min_date: datetime.date
    max_date: datetime.date
    """

    min_date = np.datetime64(min_date)
    max_date = np.datetime64(max_date)
    
    # Filter out rows where the date is within the min_date and max_date interval
    filtered_gdf = gdf_clm[
        (gdf_clm.index < min_date) | (gdf_clm.index > max_date)
    ].copy()

    return filtered_gdf

In [73]:
min_date = datetime(1980, 1, 2).date()
max_date = datetime(1980, 1, 3).date()

In [74]:
filtered_gdf_clm = filter_climate_data(gdf_clm, '103473', min_date, max_date)
filtered_gdf_clm

Unnamed: 0_level_0,month,day,year,max_air_temperature,min_air_temperature,precip,dew_point,sky_cover,wind_speed,wind_direction,solar_radiation,storm_type_id,potential_et,actual_et,actual_ei,input_units_code,station_id,geom
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1980-01-01,1,1,1980,0.840000,-3.010000,1.4198,-12.191728,,3.344670,136.672729,24.986666,,0.0000,,,1,103473,POINT (-124.93750 52.93750)
1980-01-04,1,4,1980,-4.690000,-11.010000,1.0267,-19.019001,,3.296375,110.886429,21.386667,,0.2411,,,1,103473,POINT (-124.93750 52.93750)
1980-01-05,1,5,1980,-11.550000,-25.340000,0.3188,-24.253515,,3.303887,177.709717,34.036709,,0.1602,,,1,103473,POINT (-124.93750 52.93750)
1980-01-06,1,6,1980,-10.270000,-21.549999,0.1486,-26.147823,,3.123646,99.463112,25.682625,,0.2167,,,1,103473,POINT (-124.93750 52.93750)
1980-01-07,1,7,1980,-9.460000,-19.709999,1.5281,-21.881842,,3.817544,208.816544,30.411583,,0.3014,,,1,103473,POINT (-124.93750 52.93750)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1981-07-27,7,27,1981,20.070000,11.200000,2.5071,0.716387,,2.843656,105.153351,285.197205,,5.4618,,,1,103473,POINT (-124.93750 52.93750)
1981-07-28,7,28,1981,13.800000,9.100000,8.9115,4.330667,,4.145553,163.145233,212.592163,,3.8862,,,1,103473,POINT (-124.93750 52.93750)
1981-07-29,7,29,1981,17.700001,9.610000,2.5588,8.646531,,4.306325,144.884430,267.003632,,5.4705,,,1,103473,POINT (-124.93750 52.93750)
1981-07-30,7,30,1981,17.120001,10.700000,0.0000,5.425813,,3.482095,109.948387,305.060547,,6.0498,,,1,103473,POINT (-124.93750 52.93750)


In [62]:
gdf_clm

Unnamed: 0_level_0,month,day,year,max_air_temperature,min_air_temperature,precip,dew_point,sky_cover,wind_speed,wind_direction,solar_radiation,storm_type_id,potential_et,actual_et,actual_ei,input_units_code,station_id,geom
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1980-01-01,1,1,1980,0.840000,-3.01,1.4198,-12.191728,,3.344670,136.672729,24.986666,,0.0000,,,1,103473,POINT (-124.93750 52.93750)
1980-01-02,1,2,1980,0.760000,-6.03,1.9128,-13.230651,,5.422105,199.760956,26.706041,,0.4934,,,1,103473,POINT (-124.93750 52.93750)
1980-01-03,1,3,1980,-0.990000,-5.70,5.7354,-13.108167,,5.346076,113.304489,12.304916,,0.4743,,,1,103473,POINT (-124.93750 52.93750)
1980-01-04,1,4,1980,-4.690000,-11.01,1.0267,-19.019001,,3.296375,110.886429,21.386667,,0.2411,,,1,103473,POINT (-124.93750 52.93750)
1980-01-05,1,5,1980,-11.550000,-25.34,0.3188,-24.253515,,3.303887,177.709717,34.036709,,0.1602,,,1,103473,POINT (-124.93750 52.93750)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1981-07-27,7,27,1981,20.070000,11.20,2.5071,0.716387,,2.843656,105.153351,285.197205,,5.4618,,,1,103473,POINT (-124.93750 52.93750)
1981-07-28,7,28,1981,13.800000,9.10,8.9115,4.330667,,4.145553,163.145233,212.592163,,3.8862,,,1,103473,POINT (-124.93750 52.93750)
1981-07-29,7,29,1981,17.700001,9.61,2.5588,8.646531,,4.306325,144.884430,267.003632,,5.4705,,,1,103473,POINT (-124.93750 52.93750)
1981-07-30,7,30,1981,17.120001,10.70,0.0000,5.425813,,3.482095,109.948387,305.060547,,6.0498,,,1,103473,POINT (-124.93750 52.93750)


In [45]:
from datetime import datetime
# import datetime

In [60]:
type(min_date)

datetime.date

In [51]:
# climate_table_has_station('2719000000', engine)
hasstation, min_date, max_date = climate_table_has_station('27194', engine)

min_date < datetime.strptime("1980-02-01", "%Y-%m-%d").date() #.strftime("%Y-%m-%d") < datetime.datetime("1980-01-01") #.strftime("%Y-%m-%d")

True

In [53]:
gdf_clm.dtypes

NameError: name 'gdf_clm' is not defined

In [50]:
min_date

datetime.date(1980, 1, 1)

In [None]:
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[48], line 4
      1 # climate_table_has_station('2719000000', engine)
      2 hasstation, min_date, max_date = climate_table_has_station('27194', engine)
----> 4 min_date < datetime.strptime("1980-01-01", "%Y-%m-%d") #.strftime("%Y-%m-%d") < datetime.datetime("1980-01-01") #.strftime("%Y-%m-%d")

TypeError: can't compare datetime.datetime to datetime.date