In [1]:
import pandas as pd
from pandas.tseries.holiday import USFederalHolidayCalendar as calendar
import requests
from config import eia_key

In [2]:
offset = 0
df = []
while offset<48900:
    url = 'https://api.eia.gov/v2/electricity/rto/region-data/data/?frequency=hourly&data[0]=value&facets[respondent][]=NY&facets[type][]=D&start=2019-01-01T00&end=2024-08-01T00&sort[0][column]=period&sort[0][direction]=desc&offset=' + str(offset) + "&length=5000&api_key=" + eia_key
    data = requests.get(url).json()['response']['data']
    data = pd.DataFrame(data)
    df.append(data)
    offset+=5000
    

In [3]:
data = pd.concat(df, ignore_index=True)

In [4]:
demand_hourly = data[['period', 'value']].rename(columns={'period': 'date', 'value': 'demand'})
demand_hourly['date'] = pd.to_datetime(demand_hourly['date'], infer_datetime_format=True)


In [8]:
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": 40.7143,
	"longitude": -74.006,
	"start_date": "2019-01-01",
	"end_date": "2024-08-01",
	"hourly": "temperature_2m",
	"timezone": "auto"
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}
hourly_data["temperature"] = hourly_temperature_2m

hourly_temperature_dataframe = pd.DataFrame(data = hourly_data)


In [9]:

hourly_temperature_dataframe['date'] = hourly_temperature_dataframe['date'].dt.tz_localize(None).dt.strftime('%Y-%m-%d %H:%M:%S')

In [10]:
import pandas as pd

# Assuming demand_hourly and hourly_temperature_dataframe are your two dataframes

# Convert the date columns to datetime format if they are not already
demand_hourly['date'] = pd.to_datetime(demand_hourly['date'])
hourly_temperature_dataframe['date'] = pd.to_datetime(hourly_temperature_dataframe['date'])

# Ensure the datetime values are floored to the nearest hour for consistency
demand_hourly['date'] = demand_hourly['date'].dt.floor('H')
hourly_temperature_dataframe['date'] = hourly_temperature_dataframe['date'].dt.floor('H')

# Sort both dataframes by date
demand_hourly.sort_values('date', inplace=True)
hourly_temperature_dataframe.sort_values('date', inplace=True)

# Merge the dataframes on the 'date' column
df = pd.merge(hourly_temperature_dataframe, demand_hourly, on='date', how='inner')
df.to_csv('dataset.csv')
# Display the combined dataframe
print(df)


                     date  temperature demand
0     2019-01-01 04:00:00     7.872500  16613
1     2019-01-01 05:00:00     8.672500  15774
2     2019-01-01 06:00:00     9.172500  15053
3     2019-01-01 07:00:00    10.822500  14481
4     2019-01-01 08:00:00    12.572500  13927
...                   ...          ...    ...
48928 2024-07-31 20:00:00    31.822498  26673
48929 2024-07-31 21:00:00    30.722500  27069
48930 2024-07-31 22:00:00    25.972500  27552
48931 2024-07-31 23:00:00    25.722500  27566
48932 2024-08-01 00:00:00    24.372499  27194

[48933 rows x 3 columns]
