In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import copy
import math

## AORC data is only used for prediction (training with tower met variables)

In [7]:
def specific_to_relative_humidity(specific_humidity, temperature, pressure):
    """
    Convert specific humidity to relative humidity.

    Parameters:
    specific_humidity (float): Specific humidity in kg/kg.
    temperature (float): Temperature in degrees Celsius.
    pressure (float): Atmospheric pressure in kPa.

    Returns:
    float: Relative humidity in percentage.
    """

    # Calculate the actual vapor pressure (e)
    e = (specific_humidity * pressure *10 ) / (0.622 + 0.378 * specific_humidity)

    # Calculate the saturation vapor pressure (es) using Tetens' formula
    es = 6.112 * math.exp((17.67 * temperature) / (temperature + 243.5))

    # Calculate relative humidity (RH)
    relative_humidity = (e / es) * 100

    return relative_humidity

#### US-UC1 & US-UC2

In [8]:
AORC_Rider18 = pd.read_csv(os.path.join(os.getcwd(), 'Data', 'Rider_Rider 18_combined_aorc_data.csv')).drop(['latitude', 'longitude'], axis=1)

AORC_Rider18['time'] = pd.to_datetime(AORC_Rider18['time'])
AORC_Rider18.set_index('time', inplace=True)
AORC_Rider18.index = AORC_Rider18.index - pd.DateOffset(hours=5)

# Define the time windows
growing_seasons = [
    ('2019-05-20', '2019-09-11'),
    ('2020-05-27', '2020-09-25'),
    ('2021-05-20', '2021-11-16'),
    ('2022-06-08', '2022-12-08')
]

AORC_Rider18_filtered = pd.concat([AORC_Rider18.loc[start:end] for (start, end) in growing_seasons]).between_time('5:00', '19:00')

# Columns convertion
AORC_Rider18_filtered['Air Temperature'] = AORC_Rider18_filtered['Air Temperature'] - 273.15

AORC_Rider18_filtered['Wind Speed'] = np.sqrt(AORC_Rider18_filtered['U-Component of Wind']**2 + AORC_Rider18_filtered['V-Component of Wind']**2)

AORC_Rider18_filtered['Pressure'] = AORC_Rider18_filtered['Pressure']/1000

AORC_Rider18_filtered['relative_humidity'] = AORC_Rider18_filtered.apply(
    lambda row: specific_to_relative_humidity(row['Specific Humidity'], row['Air Temperature'], row['Pressure']),
    axis=1
)

AORC_Rider18_filtered = AORC_Rider18_filtered.drop(['U-Component of Wind', 'V-Component of Wind', 'Specific Humidity'], axis=1, inplace=False)

AORC_Rider18_filtered =AORC_Rider18_filtered[['Total Precipitation', 'Air Temperature','relative_humidity', 
                                              'Downward Long-Wave Radiation Flux', 'Downward Short-Wave Radiation Flux', 'Pressure', 'Wind Speed']]

# this would make the data to match the tower data hourly aggregation
AORC_Rider18_filtered.index = AORC_Rider18_filtered.index - pd.DateOffset(hours=0)

AORC_Rider18_filtered = AORC_Rider18_filtered.between_time('6:00', '18:00')

In [9]:
AORC_Rider18_filtered.to_csv('US_UC1_UC2_2019-2022_filtered_7_vars_AORC_MOD.csv')

#### US-UC1 & US-UC2 2024

In [3]:
AORC_Rider18_2024 = pd.read_csv(os.path.join(os.getcwd(), 'Data', 'Rider_Rider_18_Full 2024 AORC data.csv')).drop(['latitude', 'longitude'], axis=1)

AORC_Rider18_2024['time'] = pd.to_datetime(AORC_Rider18_2024['time'])
AORC_Rider18_2024.set_index('time', inplace=True)
AORC_Rider18_2024.index = AORC_Rider18_2024.index - pd.DateOffset(hours=5)

# Define the time windows
growing_seasons = [('2024-05-25', '2024-10-08')]

AORC_Rider18_2024_filtered = pd.concat([AORC_Rider18_2024.loc[start:end] for (start, end) in growing_seasons]).between_time('5:00', '19:00')

# Columns convertion
AORC_Rider18_2024_filtered['Air Temperature'] = AORC_Rider18_2024_filtered['Air Temperature'] - 273.15

AORC_Rider18_2024_filtered['Wind Speed'] = np.sqrt(AORC_Rider18_2024_filtered['U-Component of Wind']**2 + AORC_Rider18_2024_filtered['V-Component of Wind']**2)

AORC_Rider18_2024_filtered['Pressure'] = AORC_Rider18_2024_filtered['Pressure']/1000

AORC_Rider18_2024_filtered['relative_humidity'] = AORC_Rider18_2024_filtered.apply(
    lambda row: specific_to_relative_humidity(row['Specific Humidity'], row['Air Temperature'], row['Pressure']),
    axis=1
)

AORC_Rider18_2024_filtered = AORC_Rider18_2024_filtered.drop(['U-Component of Wind', 'V-Component of Wind', 'Specific Humidity'], axis=1, inplace=False)

AORC_Rider18_2024_filtered =AORC_Rider18_2024_filtered[['Total Precipitation', 'Air Temperature','relative_humidity', 
                                              'Downward Long-Wave Radiation Flux', 'Downward Short-Wave Radiation Flux', 'Pressure', 'Wind Speed']]

# this would make the data to match the tower data hourly aggregation
AORC_Rider18_2024_filtered.index = AORC_Rider18_2024_filtered.index - pd.DateOffset(hours=0)

AORC_Rider18_2024_filtered = AORC_Rider18_2024_filtered.between_time('6:00', '18:00')

In [4]:
AORC_Rider18_2024_filtered.to_csv('US_UC1_UC2_2024_filtered_7_vars_AORC_MOD.csv')

#### HWB

In [5]:
AORC_HWB = pd.read_csv(os.path.join(os.getcwd(), 'Data', 'US-HWB_combined_aorc_data.csv')).drop(['latitude', 'longitude'], axis=1)

AORC_HWB['time'] = pd.to_datetime(AORC_HWB['time'])
AORC_HWB.set_index('time', inplace=True)
AORC_HWB.index = AORC_HWB.index - pd.DateOffset(hours=5)

# Define the time windows
growing_seasons = [('2017-04-15', '2017-09-30')]

AORC_HWB_filtered = pd.concat([AORC_HWB.loc[start:end] for (start, end) in growing_seasons]).between_time('5:00', '19:00')

# Columns convertion
AORC_HWB_filtered['Air Temperature'] = AORC_HWB_filtered['Air Temperature'] - 273.15

AORC_HWB_filtered['Wind Speed'] = np.sqrt(AORC_HWB_filtered['U-Component of Wind']**2 + AORC_HWB_filtered['V-Component of Wind']**2)

AORC_HWB_filtered['Pressure'] = AORC_HWB_filtered['Pressure']/1000

AORC_HWB_filtered['relative_humidity'] = AORC_HWB_filtered.apply(
    lambda row: specific_to_relative_humidity(row['Specific Humidity'], row['Air Temperature'], row['Pressure']),
    axis=1
)

AORC_HWB_filtered = AORC_HWB_filtered.drop(['U-Component of Wind', 'V-Component of Wind', 'Specific Humidity'], axis=1, inplace=False)

AORC_HWB_filtered =AORC_HWB_filtered[['Total Precipitation', 'Air Temperature','relative_humidity', 
                                              'Downward Long-Wave Radiation Flux', 'Downward Short-Wave Radiation Flux', 'Pressure', 'Wind Speed']]

# this would make the data to match the tower data hourly aggregation
AORC_HWB_filtered.index = AORC_HWB_filtered.index - pd.DateOffset(hours=0)

AORC_HWB_filtered = AORC_HWB_filtered.between_time('6:00', '18:00')

In [6]:
AORC_HWB_filtered.to_csv('US_HWB_2017_filtered_7_vars_AORC_MOD.csv')

In [31]:
AORC_HWB_filtered

Unnamed: 0_level_0,Total Precipitation,Air Temperature,relative_humidity,Downward Long-Wave Radiation Flux,Downward Short-Wave Radiation Flux,Pressure,Wind Speed
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-04-15 06:00:00,0.0,11.25000,67.355278,299.20000,217.00000,98.37,4.512206
2017-04-15 07:00:00,0.0,14.15002,59.570013,301.30002,368.00000,98.34,4.804165
2017-04-15 08:00:00,0.0,16.25000,53.713915,299.00000,520.10004,98.28,5.323533
2017-04-15 09:00:00,0.0,18.05000,51.677977,329.60000,639.90000,98.21,5.824947
2017-04-15 10:00:00,0.0,20.15002,47.288998,331.20000,734.60004,98.14,5.322593
...,...,...,...,...,...,...,...
2017-09-30 14:00:00,0.0,15.35000,47.983552,295.20000,441.50000,98.46,6.747592
2017-09-30 15:00:00,0.0,15.25000,47.394103,323.20000,263.60000,98.48,6.040695
2017-09-30 16:00:00,0.0,14.15002,48.929981,323.90000,127.80000,98.48,5.035871
2017-09-30 17:00:00,0.0,11.05000,61.185669,316.60000,0.00000,98.52,4.031129
