In [42]:
from pathlib import Path
import re, json

from tqdm.auto import tqdm

import pandas as pd

import pyagnps

from sqlalchemy import URL

In [48]:
rods_folder = Path("C:/Users/Luc/Desktop/nldas_temp")
path_to_creds = Path("C:/Users/Luc/projects/pyagnps/inputs/db_credentials.json")

format = "parquet"
db_table_name = "climate_nldas2"

In [49]:
all_chunks = list(rods_folder.glob(f"**/climate_daily*chunk*.{format}"))

In [50]:
def open_creds_dict(path_to_json_creds):
    with open(path_to_json_creds, "r") as f:
        credentials = json.load(f)
        return credentials

creds = open_creds_dict(path_to_creds)

db_url = URL.create(
                    "postgresql",
                    username=creds['user'],
                    password=creds['password'],
                    host=creds['host'],
                    port=creds['port'],
                    database=creds['database']
                    )

In [15]:
df = pd.read_parquet(all_chunks[0])

In [51]:
df.head()

Unnamed: 0_level_0,lon,lat,Month,Day,Year,Max_Air_Temperature,Min_Air_Temperature,Precip,Dew_Point,Sky_Cover,Wind_Speed,Wind_Direction,Solar_Radiation,Storm_Type_ID,Potential_ET,Actual_ET,Actual_EI,Input_Units_Code
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2020-01-01,-74.9375,48.0625,1,1,2020,-1.540009,-4.649994,1.0682,-3.714569,,2.397885,77.159096,56.581211,,0.3525,,,1
2020-01-02,-74.9375,48.0625,1,2,2020,-3.109985,-8.769989,3.1634,-4.910004,,3.473577,8.547076,60.403004,,0.3279,,,1
2020-01-03,-74.9375,48.0625,1,3,2020,-0.390015,-3.109985,1.3394,-1.6521,,3.967525,52.565678,47.090755,,0.328,,,1
2020-01-04,-74.9375,48.0625,1,4,2020,-1.790009,-9.670013,0.9862,-7.440002,,2.991296,175.502121,63.626331,,0.5155,,,1
2020-01-05,-74.9375,48.0625,1,5,2020,-7.290009,-15.670013,0.3302,-13.548767,,3.541662,161.848557,71.422424,,0.5455,,,1


In [52]:
stations = {}

for chunk in all_chunks:
    station_id = re.findall(r'climate_daily_(.*?)_', str(chunk))[0]
    if station_id in stations:
        stations[station_id].append(chunk)
    else:
        stations[station_id] = [chunk]

In [53]:
processed_stations = set()

In [54]:
engine_creator = lambda : pyagnps.climate.create_engine_with_pool(db_url, max_connections=20)

In [58]:
engine = engine_creator()

for station_id, chunk_files in tqdm(stations.items(), desc="Writing chunks to database", ascii=True):
    
    df_station = pd.concat([pd.read_parquet(file, engine='pyarrow') for file in chunk_files])

    lon, lat = df_station.iloc[0].lon.item(), df_station.iloc[0].lat.item()

    # Process for database insertion
    available_dates = pyagnps.climate.get_available_dates_for_station(station_id, engine, table=db_table_name)
    missing_dates = pyagnps.climate.get_missing_dates(available_dates, df_station.index.min(), df_station.index.max())
    continuous_periods = pyagnps.climate.find_continuous_periods(missing_dates)

    gdf_clm = None
    for period in continuous_periods:
        start, end = period[0], period[-1]
        df_period = df_station[(df_station.index >= start) & (df_station.index <= end)]
        
        if len(df_period) == 0:
            continue

        gdf_clm_period = pyagnps.climate.prepare_annagnps_climate_for_db(df_period, station_id, lon, lat)
        
        if gdf_clm is None:
            gdf_clm = gdf_clm_period
        else:
            gdf_clm = pd.concat([gdf_clm, gdf_clm_period])

    if gdf_clm is not None:
        print("pretending to insert climate data for", station_id)
        # try:
        #     pyagnps.climate.insert_climate_nldas2(gdf_clm, engine, table=db_table_name)
        #     for chunk in chunk_files:
        #         chunk.unlink()
        # except Exception as e:
        #     print(e)
        # pyagnps.climate.insert_climate_nldas2(gdf_clm, engine, table="climate_nldas2")

# Clean up temporary directory
# shutil.rmtree(output_dir_temp)

Writing chunks to database:   0%|          | 0/28 [00:00<?, ?it/s]

pretending to insert climate data for 85777
pretending to insert climate data for 85785
pretending to insert climate data for 85793
pretending to insert climate data for 85801
pretending to insert climate data for 85809
pretending to insert climate data for 85817
pretending to insert climate data for 85825
pretending to insert climate data for 89489
pretending to insert climate data for 89497
pretending to insert climate data for 89505
pretending to insert climate data for 89513
pretending to insert climate data for 89521
pretending to insert climate data for 89529
pretending to insert climate data for 89537


KeyboardInterrupt: 

In [56]:
gdf_clm_period

Unnamed: 0_level_0,month,day,year,max_air_temperature,min_air_temperature,precip,dew_point,sky_cover,wind_speed,wind_direction,solar_radiation,storm_type_id,potential_et,actual_et,actual_ei,input_units_code,station_id,geom
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2020-01-05,1,5,2020,-7.839996,-15.399994,0.5128,-13.448334,,4.959896,163.60965,37.802086,,0.4945,,,1,96961,POINT (-68.9375 51.0625)
2020-01-06,1,6,2020,-17.600006,-25.339996,0.0344,-21.99501,,0.211346,251.493622,64.113129,,0.1211,,,1,96961,POINT (-68.9375 51.0625)
2020-01-07,1,7,2020,-11.730011,-24.220001,0.4893,-17.052094,,2.628879,348.934052,52.6898,,0.3158,,,1,96961,POINT (-68.9375 51.0625)
2020-01-08,1,8,2020,-9.450012,-19.470001,2.1315,-13.799988,,0.055586,213.690109,51.834126,,0.4094,,,1,96961,POINT (-68.9375 51.0625)
2020-01-09,1,9,2020,-10.709991,-23.819992,0.0998,-21.327927,,5.363928,144.912445,65.293083,,0.3385,,,1,96961,POINT (-68.9375 51.0625)
2020-01-10,1,10,2020,-12.779999,-25.259995,4.8128,-17.306244,,3.911921,344.157715,19.052336,,0.3537,,,1,96961,POINT (-68.9375 51.0625)
2020-01-11,1,11,2020,-3.450012,-12.779999,4.0976,-9.609589,,2.276002,134.918396,41.764336,,0.6271,,,1,96961,POINT (-68.9375 51.0625)
2020-01-12,1,12,2020,-12.540009,-23.949997,0.1326,-21.842484,,4.031723,167.810928,66.266716,,0.3547,,,1,96961,POINT (-68.9375 51.0625)
2020-01-13,1,13,2020,-19.149994,-29.429993,0.9656,-23.411667,,1.060036,177.679688,64.508125,,0.2231,,,1,96961,POINT (-68.9375 51.0625)
2020-01-14,1,14,2020,-17.619995,-25.789993,0.1842,-22.396652,,1.169425,344.458405,71.322548,,0.1444,,,1,96961,POINT (-68.9375 51.0625)
