In [7]:
import numpy as np
import pandas as pd
import os
import dask.dataframe as dd
import warnings
warnings.filterwarnings('ignore')

In [8]:
def prepare_AORC_PS_NDVI_df(PS_daily_NDVI_df, AORC_with_7_vars):
    PS_daily_NDVI_df_day = PS_daily_NDVI_df[['longitude', 'latitude', 'NDVI']]

    dask_df = dd.from_pandas(PS_daily_NDVI_df_day, npartitions=1)

    # Duplicate rows
    duplicated_df = dd.concat([dask_df] * 13).compute()

    unique_dates = PS_daily_NDVI_df_day.index.unique()

    hourly_index = []
    for date_ in unique_dates:
        for hour in range(6, 19):  # Hours from 6 AM to 6 PM
            for _ in range(len(PS_daily_NDVI_df_day)):  # For each location
                hourly_index.append(date_ + pd.Timedelta(hours=hour))

    # Assign the new hourly index
    duplicated_df['datetime'] = hourly_index
    PS_hourly_NDVI_df = duplicated_df.set_index('datetime').sort_index()

    AORC_PS_NDVI_date_hourly_df = pd.merge(AORC_with_7_vars, PS_hourly_NDVI_df, left_index=True, right_index=True, how='inner')

    return AORC_PS_NDVI_date_hourly_df

### US_HWB_2017

In [10]:
daily_PS_NDVI_US_HWB_2017_entirefield = pd.read_csv('daily_PS_NDVI_US_HWB_2017_entirefield.csv')
daily_PS_NDVI_US_HWB_2017_entirefield['Date'] = pd.to_datetime(daily_PS_NDVI_US_HWB_2017_entirefield['Date'], format='%Y%m%d')
daily_PS_NDVI_US_HWB_2017_entirefield.set_index('Date', inplace=True)

US_HWB_hourly_AORC = pd.read_csv(os.path.join(os.path.dirname(os.getcwd()),'4_AORC', 'US_HWB_2017_filtered_7_vars_AORC_MOD.csv'),
                                index_col = 'time', parse_dates=['time'])

In [11]:
for gs_date in daily_PS_NDVI_US_HWB_2017_entirefield.index.unique():
    gs_date_PS_NDVI_df = daily_PS_NDVI_US_HWB_2017_entirefield.loc[gs_date].copy()
    US_HWB_hourly_AORC_gs_date = US_HWB_hourly_AORC.loc[gs_date.strftime('%Y-%m-%d')].copy()
    
    AORC_PS_NDVI_gs_date_hourly_df = prepare_AORC_PS_NDVI_df(gs_date_PS_NDVI_df, US_HWB_hourly_AORC_gs_date)
    
    #AORC_PS_NDVI_gs_date_hourly_df['Downward Long-Wave Radiation Flux'].hist()
    AORC_PS_NDVI_gs_date_hourly_df.to_csv(os.path.join(os.getcwd(), 'US_HWB',f'Hourly_AORC_PS_NDVI_per_Day_{gs_date.strftime("%Y-%m-%d")}_US_HWB.csv'))

In [40]:
US_HWB_hourly_AORC_PS_NDVI_csv_files_directiory = os.path.join(os.path.dirname(os.getcwd()),
                                                               '8_Data_prepration_prediction_phase', 'US_HWB')
for csv_file in os.listdir(US_HWB_hourly_AORC_PS_NDVI_csv_files_directiory):
    gs_date = csv_file[28:38]
    print(gs_date)
    AORC_PS_NDVI_gs_date_hourly_df = pd.read_csv(os.path.join(US_HWB_hourly_AORC_PS_NDVI_csv_files_directiory, csv_file), index_col=0,parse_dates=[0])

    print(AORC_PS_NDVI_gs_date_hourly_df['Downward Short-Wave Radiation Flux'].max())
    print(AORC_PS_NDVI_gs_date_hourly_df['Downward Short-Wave Radiation Flux'].min())

2017-04-18
834.7
0.0
2017-04-19
404.0
0.0
2017-04-20
665.8
0.0
2017-04-21
746.9
0.0
2017-04-22
576.2
0.0
2017-04-23
858.60004
0.0
2017-04-24
782.10004
0.0
2017-04-25


KeyboardInterrupt: 

In [38]:
AORC_PS_NDVI_gs_date_hourly_df['Downward Short-Wave Radiation Flux'].max()

Unnamed: 0,Total Precipitation,Air Temperature,Specific Humidity,Downward Long-Wave Radiation Flux,Downward Short-Wave Radiation Flux,Air Pressure,Wind Speed,Total_Precipitation_12h_sum,Total_Precipitation_3h_sum,Specific_Humidity_3h_mean,longitude,latitude,NDVI
2017-04-18 06:00:00,0.0,7.85,0.0048,252.8,237.1,98.56,2.137756,0.0,0.0,0.004367,-77.847760,40.863611,0.714136
2017-04-18 06:00:00,0.0,7.85,0.0048,252.8,237.1,98.56,2.137756,0.0,0.0,0.004367,-77.847724,40.863610,0.711310
2017-04-18 06:00:00,0.0,7.85,0.0048,252.8,237.1,98.56,2.137756,0.0,0.0,0.004367,-77.847689,40.863609,0.709120
2017-04-18 06:00:00,0.0,7.85,0.0048,252.8,237.1,98.56,2.137756,0.0,0.0,0.004367,-77.847653,40.863608,0.710367
2017-04-18 06:00:00,0.0,7.85,0.0048,252.8,237.1,98.56,2.137756,0.0,0.0,0.004367,-77.847618,40.863608,0.713594
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-04-18 18:00:00,0.0,16.55,0.0048,297.2,0.0,98.33,3.130495,0.0,0.0,0.004700,-77.850696,40.858746,0.291687
2017-04-18 18:00:00,0.0,16.55,0.0048,297.2,0.0,98.33,3.130495,0.0,0.0,0.004700,-77.850660,40.858745,0.290125
2017-04-18 18:00:00,0.0,16.55,0.0048,297.2,0.0,98.33,3.130495,0.0,0.0,0.004700,-77.850625,40.858744,0.281425
2017-04-18 18:00:00,0.0,16.55,0.0048,297.2,0.0,98.33,3.130495,0.0,0.0,0.004700,-77.850589,40.858743,0.261428


### US_GBF_2024

In [12]:
daily_PS_NDVI_GBF_2024_entirefield = pd.read_csv('daily_PS_NDVI_GBF_2024_entirefield.csv')
daily_PS_NDVI_GBF_2024_entirefield['Date'] = pd.to_datetime(daily_PS_NDVI_GBF_2024_entirefield['Date'], format='%Y%m%d')
daily_PS_NDVI_GBF_2024_entirefield.set_index('Date', inplace=True)

US_GBF_hourly_AORC = pd.read_csv(os.path.join(os.path.dirname(os.getcwd()),'4_AORC', 'US_UC1_UC2_2024_filtered_7_vars_AORC_MOD.csv'),
                                index_col = 'time', parse_dates=['time'])

In [16]:
for gs_date in daily_PS_NDVI_GBF_2024_entirefield.index.unique():
    gs_date_PS_NDVI_df = daily_PS_NDVI_GBF_2024_entirefield.loc[gs_date].copy()
    US_GBF_hourly_AORC_gs_date = US_GBF_hourly_AORC.loc[gs_date.strftime('%Y-%m-%d')].copy()
    gs_date_PS_NDVI_df = gs_date_PS_NDVI_df.rename(columns={'daily_NDVI':'NDVI'})
    
    AORC_PS_NDVI_gs_date_hourly_df = prepare_AORC_PS_NDVI_df(gs_date_PS_NDVI_df, US_GBF_hourly_AORC_gs_date)
    
    #AORC_PS_NDVI_gs_date_hourly_df['Downward Long-Wave Radiation Flux'].hist()
    AORC_PS_NDVI_gs_date_hourly_df.to_csv(os.path.join(os.getcwd(), 'US_GBF', f'Hourly_AORC_PS_NDVI_per_Day_{gs_date.strftime("%Y-%m-%d")}_US_GBF.csv'))

KeyError: '2024-10-09'

### For Specific dates

In [28]:
NDVI_csv_dir = os.path.join(os.getcwd(), 'US_GBF_PS_NDVI', '2024')
US_GBF_hourly_AORC = pd.read_csv(os.path.join(os.path.dirname(os.getcwd()),'4_AORC', 'US_UC1_UC2_2024_filtered_7_vars_AORC_MOD.csv'),
                                index_col = 'time', parse_dates=['time'])

for PS_ndvi_csv in os.listdir(NDVI_csv_dir): # a directory with imputed PS NDVI csvs from "PS_NDVI_per_day" ipynb
    if PS_ndvi_csv.endswith('csv'):
        gs_date = PS_ndvi_csv[21:29]
        gs_date_PS_NDVI_df = pd.read_csv(os.path.join(NDVI_csv_dir, PS_ndvi_csv))
        gs_date_PS_NDVI_df['Date'] = pd.to_datetime(gs_date_PS_NDVI_df['Date'], format='%Y%m%d')
        gs_date_PS_NDVI_df.set_index('Date', inplace=True)

        hourly_AORC_gs_date = US_GBF_hourly_AORC.loc[pd.to_datetime(gs_date,format='%Y%m%d').strftime('%Y-%m-%d')].copy()
        AORC_PS_NDVI_gs_date_hourly_df = prepare_AORC_PS_NDVI_df(gs_date_PS_NDVI_df, hourly_AORC_gs_date)[['Air Temperature', 'relative_humidity',
                                                                                                           'Downward Short-Wave Radiation Flux',
                                                                                                           'longitude', 'latitude', 'NDVI']]
                                                                                                           
        AORC_PS_NDVI_gs_date_hourly_df.to_csv(os.path.join(NDVI_csv_dir, 'AORC_Agg', 
                                                       f'Hourly_AORC_PS_NDVI_per_Day_{pd.to_datetime(gs_date,format="%Y%m%d").strftime("%Y-%m-%d")}_US_GBF.csv'))

KeyboardInterrupt: 

In [None]:
NDVI_csv_dir = os.path.join(os.getcwd(), 'US_HWB_PS_NDVI', '2017')
US_HWB_hourly_AORC = pd.read_csv(os.path.join(os.path.dirname(os.getcwd()),'4_AORC', 'US_HWB_2017_filtered_7_vars_AORC_MOD.csv'),
                                index_col = 'time', parse_dates=['time'])

for PS_ndvi_csv in os.listdir(NDVI_csv_dir): # a directory with imputed PS NDVI csvs from "PS_NDVI_per_day" ipynb
    if PS_ndvi_csv.endswith('csv'):
        gs_date = PS_ndvi_csv[21:29]
        gs_date_PS_NDVI_df = pd.read_csv(os.path.join(NDVI_csv_dir, PS_ndvi_csv))
        gs_date_PS_NDVI_df['Date'] = pd.to_datetime(gs_date_PS_NDVI_df['Date'], format='%Y%m%d')
        gs_date_PS_NDVI_df.set_index('Date', inplace=True)

        hourly_AORC_gs_date = US_HWB_hourly_AORC.loc[pd.to_datetime(gs_date,format='%Y%m%d').strftime('%Y-%m-%d')].copy()
        AORC_PS_NDVI_gs_date_hourly_df = prepare_AORC_PS_NDVI_df(gs_date_PS_NDVI_df, hourly_AORC_gs_date)[['Air Temperature', 'relative_humidity',
                                                                                                           'Downward Short-Wave Radiation Flux',
                                                                                                           'longitude', 'latitude', 'NDVI']]
        AORC_PS_NDVI_gs_date_hourly_df.to_csv(os.path.join(NDVI_csv_dir, 'AORC_Agg', 
                                                       f'Hourly_AORC_PS_NDVI_per_Day_{pd.to_datetime(gs_date,format="%Y%m%d").strftime("%Y-%m-%d")}_US_HWB.csv'))

#### dates

In [4]:
Date_of_image = '20240824'
daily_PS_NDVI_GBF_2024_entirefield = pd.read_csv(f'daily_PS_NDVI_US_GBF_{Date_of_image}_entirefield.csv')
daily_PS_NDVI_GBF_2024_entirefield['Date'] = pd.to_datetime(daily_PS_NDVI_GBF_2024_entirefield['Date'], format='%Y%m%d')
daily_PS_NDVI_GBF_2024_entirefield.set_index('Date', inplace=True)

US_GBF_hourly_AORC = pd.read_csv(os.path.join(os.path.dirname(os.getcwd()),'4_AORC', 'US_UC1_UC2_2024_filtered_7_vars_AORC_MOD.csv'),
                                index_col = 'time', parse_dates=['time'])

for gs_date in daily_PS_NDVI_GBF_2024_entirefield.index.unique():
    gs_date_PS_NDVI_df = daily_PS_NDVI_GBF_2024_entirefield.loc[gs_date].copy()
    US_GBF_hourly_AORC_gs_date = US_GBF_hourly_AORC.loc[gs_date.strftime('%Y-%m-%d')].copy()
    #gs_date_PS_NDVI_df = gs_date_PS_NDVI_df.rename(columns={'daily_NDVI':'NDVI'})
    
    AORC_PS_NDVI_gs_date_hourly_df = prepare_AORC_PS_NDVI_df(gs_date_PS_NDVI_df, US_GBF_hourly_AORC_gs_date)
    
    #AORC_PS_NDVI_gs_date_hourly_df['Downward Long-Wave Radiation Flux'].hist()
    AORC_PS_NDVI_gs_date_hourly_df.to_csv(f'Hourly_AORC_PS_NDVI_per_Day_{gs_date.strftime("%Y-%m-%d")}_US_GBF.csv')

In [6]:
Date_of_image = '20240613'
daily_PS_NDVI_GBF_2024_entirefield = pd.read_csv(f'daily_PS_NDVI_US_GBF_{Date_of_image}_entirefield.csv')
daily_PS_NDVI_GBF_2024_entirefield['Date'] = pd.to_datetime(daily_PS_NDVI_GBF_2024_entirefield['Date'], format='%Y%m%d')
daily_PS_NDVI_GBF_2024_entirefield.set_index('Date', inplace=True)

US_GBF_hourly_AORC = pd.read_csv(os.path.join(os.path.dirname(os.getcwd()),'4_AORC', 'US_UC1_UC2_2024_filtered_7_vars_AORC_MOD.csv'),
                                index_col = 'time', parse_dates=['time'])

for gs_date in daily_PS_NDVI_GBF_2024_entirefield.index.unique():
    gs_date_PS_NDVI_df = daily_PS_NDVI_GBF_2024_entirefield.loc[gs_date].copy()
    US_GBF_hourly_AORC_gs_date = US_GBF_hourly_AORC.loc[gs_date.strftime('%Y-%m-%d')].copy()
    #gs_date_PS_NDVI_df = gs_date_PS_NDVI_df.rename(columns={'daily_NDVI':'NDVI'})
    
    AORC_PS_NDVI_gs_date_hourly_df = prepare_AORC_PS_NDVI_df(gs_date_PS_NDVI_df, US_GBF_hourly_AORC_gs_date)
    
    #AORC_PS_NDVI_gs_date_hourly_df['Downward Long-Wave Radiation Flux'].hist()
    AORC_PS_NDVI_gs_date_hourly_df.to_csv(f'Hourly_AORC_PS_NDVI_per_Day_{gs_date.strftime("%Y-%m-%d")}_US_GBF.csv')

In [5]:
Date_of_image = '20220710'
daily_PS_NDVI_GBF_2024_entirefield = pd.read_csv(f'daily_PS_NDVI_US_GBF_{Date_of_image}_entirefield.csv')
daily_PS_NDVI_GBF_2024_entirefield['Date'] = pd.to_datetime(daily_PS_NDVI_GBF_2024_entirefield['Date'], format='%Y%m%d')
daily_PS_NDVI_GBF_2024_entirefield.set_index('Date', inplace=True)

US_GBF_hourly_AORC = pd.read_csv(os.path.join(os.path.dirname(os.getcwd()),'4_AORC', 'US_UC1_UC2_2019-2022_filtered_7_vars_AORC_MOD.csv'),
                                index_col = 'time', parse_dates=['time'])

for gs_date in daily_PS_NDVI_GBF_2024_entirefield.index.unique():
    gs_date_PS_NDVI_df = daily_PS_NDVI_GBF_2024_entirefield.loc[gs_date].copy()
    US_GBF_hourly_AORC_gs_date = US_GBF_hourly_AORC.loc[gs_date.strftime('%Y-%m-%d')].copy()
    #gs_date_PS_NDVI_df = gs_date_PS_NDVI_df.rename(columns={'daily_NDVI':'NDVI'})
    
    AORC_PS_NDVI_gs_date_hourly_df = prepare_AORC_PS_NDVI_df(gs_date_PS_NDVI_df, US_GBF_hourly_AORC_gs_date)
    
    #AORC_PS_NDVI_gs_date_hourly_df['Downward Long-Wave Radiation Flux'].hist()
    AORC_PS_NDVI_gs_date_hourly_df.to_csv(f'Hourly_AORC_PS_NDVI_per_Day_{gs_date.strftime("%Y-%m-%d")}_US_GBF.csv')

In [3]:
Date_of_image = '20220803'
daily_PS_NDVI_GBF_2024_entirefield = pd.read_csv(f'daily_PS_NDVI_US_GBF_{Date_of_image}_entirefield.csv')
daily_PS_NDVI_GBF_2024_entirefield['Date'] = pd.to_datetime(daily_PS_NDVI_GBF_2024_entirefield['Date'], format='%Y%m%d')
daily_PS_NDVI_GBF_2024_entirefield.set_index('Date', inplace=True)

US_GBF_hourly_AORC = pd.read_csv(os.path.join(os.path.dirname(os.getcwd()),'4_AORC', 'US_UC1_UC2_2019-2022_filtered_7_vars_AORC_MOD.csv'),
                                index_col = 'time', parse_dates=['time'])

for gs_date in daily_PS_NDVI_GBF_2024_entirefield.index.unique():
    gs_date_PS_NDVI_df = daily_PS_NDVI_GBF_2024_entirefield.loc[gs_date].copy()
    US_GBF_hourly_AORC_gs_date = US_GBF_hourly_AORC.loc[gs_date.strftime('%Y-%m-%d')].copy()
    #gs_date_PS_NDVI_df = gs_date_PS_NDVI_df.rename(columns={'daily_NDVI':'NDVI'})
    
    AORC_PS_NDVI_gs_date_hourly_df = prepare_AORC_PS_NDVI_df(gs_date_PS_NDVI_df, US_GBF_hourly_AORC_gs_date)
    
    #AORC_PS_NDVI_gs_date_hourly_df['Downward Long-Wave Radiation Flux'].hist()
    AORC_PS_NDVI_gs_date_hourly_df.to_csv(f'Hourly_AORC_PS_NDVI_per_Day_{gs_date.strftime("%Y-%m-%d")}_US_GBF.csv')

In [29]:
Date_of_image = '20170517'
daily_PS_NDVI_GBF_2024_entirefield = pd.read_csv(f'daily_PS_NDVI_US_HWB_{Date_of_image}_entirefield.csv')
daily_PS_NDVI_GBF_2024_entirefield['Date'] = pd.to_datetime(daily_PS_NDVI_GBF_2024_entirefield['Date'], format='%Y%m%d')
daily_PS_NDVI_GBF_2024_entirefield.set_index('Date', inplace=True)

US_GBF_hourly_AORC = pd.read_csv(os.path.join(os.path.dirname(os.getcwd()),'4_AORC', 'US_HWB_2017_filtered_7_vars_AORC_MOD.csv'),
                                index_col = 'time', parse_dates=['time'])

for gs_date in daily_PS_NDVI_GBF_2024_entirefield.index.unique():
    gs_date_PS_NDVI_df = daily_PS_NDVI_GBF_2024_entirefield.loc[gs_date].copy()
    US_GBF_hourly_AORC_gs_date = US_GBF_hourly_AORC.loc[gs_date.strftime('%Y-%m-%d')].copy()
    #gs_date_PS_NDVI_df = gs_date_PS_NDVI_df.rename(columns={'daily_NDVI':'NDVI'})
    
    AORC_PS_NDVI_gs_date_hourly_df = prepare_AORC_PS_NDVI_df(gs_date_PS_NDVI_df, US_GBF_hourly_AORC_gs_date)
    
    #AORC_PS_NDVI_gs_date_hourly_df['Downward Long-Wave Radiation Flux'].hist()
    AORC_PS_NDVI_gs_date_hourly_df.to_csv(f'Hourly_AORC_PS_NDVI_per_Day_{gs_date.strftime("%Y-%m-%d")}_US_HWB.csv')