# Demand Data Collection

In [None]:
import requests
import pandas as pd

## Electricity Zones by Regional Transmission Organization (RTO)

## MISO (Midcontinent Independent System Operator)
- **(0001)** Zone 1
- **(0004)** Zone 4
- **(0006)** Zone 6
- **(0027)** Zones 2 and 7
- **(0035)** Zones 3 and 5
- **(8910)** Zones 8, 9 and 10

## ISNE (ISO New England)
- **(4001)** Maine
- **(4002)** New Hampshire
- **(4003)** Vermont
- **(4004)** Connecticut
- **(4005)** Rhode Island
- **(4006)** Southeast Mass.
- **(4007)** Western/Central Mass.
- **(4008)** Northeast Mass.

## PNM (Public Service Company of New Mexico)
- **(ACMA)** City of Acoma Pueblo
- **(CYGA)** PNM-CYGA
- **(Frep)** Frep
- **(Jica)** Jicarilla Apache Nation
- **(KAFB)** KAFB
- **(KCEC)** KCEC
- **(LAC)** Los Alamos County
- **(NTUA)** Navajo Tribal Utility Authority
- **(PNM)** PNM System Firm Load
- **(TSGT)** TSGT

## PJM (PJM Interconnection)
- **(AE)** Atlantic Electric zone
- **(AEP)** American Electric Power zone
- **(AP)** Allegheny Power zone
- **(ATSI)** American Transmission Systems, Inc. zone
- **(BC)** Baltimore Gas null zone
- **(CE)** Commonwealth Edison zone
- **(DAY)** Dayton Power null zone
- **(DEOK)** Duke Energy Ohio/Kentucky zone
- **(DOM)** Dominion Virginia Power zone
- **(DPL)** Delmarva Power null zone
- **(DUQ)** Duquesne Lighting Company zone
- **(EKPC)** East Kentucky Power Cooperative zone
- **(JC)** Jersey Central Power null zone
- **(ME)** Metropolitan Edison zone
- **(PE)** PECO Energy zone
- **(PEP)** Potomac Electric Power zone
- **(PL)** Pennsylvania Power zone
- **(PN)** Pennsylvania Electric zone
- **(PS)** Public Service Electric & Gas of New Jersey zone
- **(RECO)** Rockland Electric (East) zone

## ERCO (ERCOT - Electric Reliability Council of Texas)
- **(COAS)** Coast
- **(EAST)** East
- **(FWES)** Far West
- **(NCEN)** North Central
- **(NRTH)** North
- **(SCEN)** South Central
- **(SOUT)** South
- **(WEST)** West

## SWPP (Southwest Power Pool)
- **(CSWS)** AEPW American Electric Power West
- **(EDE)** Empire District Electric Company
- **(GRDA)** Grand River Dam Authority
- **(INDN)** Independence Power null
- **(KACY)** Kansas City Board of Public Utilities
- **(KCPL)** Kansas City Power & Light
- **(LES)** Lincoln Electric System
- **(MPS)** KCP&L Greater Missouri Operations
- **(NPPD)** Nebraska Public Power District
- **(OKGE)** Oklahoma Gas and Electric Co.
- **(OPPD)** Omaha Public Power District
- **(SECI)** Sunflower Electric
- **(SPRM)** City of Springfield
- **(SPS)** Southwestern Public Service Company
- **(WAUE)** Western Area Power Upper Great Plains East
- **(WFEC)** Western Farmers Electric Cooperative
- **(WR)** Westar Energy

## CISO (California ISO)
- **(PGAE)** Pacific Gas and Electric
- **(SCE)** Southern California Edison
- **(SDGE)** San Diego Gas and Electric
- **(VEA)** Valley Electric Association

## NYIS (New York Independent System Operator)
- **(ZONA)** West
- **(ZONB)** Genesee
- **(ZONC)** Central
- **(ZOND)** North
- **(ZONE)** Mohawk Valley
- **(ZONF)** Capital
- **(ZONG)** Hudson Valley
- **(ZONH)** Millwood
- **(ZONI)** Dunwoodie
- **(ZONJ)** New York City
- **(ZONK)** Long Island


In [49]:
DEMAND_API_KEY = "f8tGzRmnyw6dJyy1PyS49REmg1qrT2isvVi8i9mt"

def get_demand_data(subba, start_date, end_date):
    demand_url = "https://api.eia.gov/v2/electricity/rto/region-sub-ba-data/data/?api_key={0}&data[]=value&facets[subba][]={1}&start={2}&end={3}".format(DEMAND_API_KEY, subba, start_date, end_date)

    response = requests.get(demand_url)

    if response.status_code == 200:
        json_data = response.json()
        df_demand = pd.DataFrame(json_data["response"]["data"])
        df_demand = df_demand.drop(columns=['subba', 'parent', 'parent-name'])
        return df_demand

    else:
        print(f"Failed to retrieve data: {response.status_code}")
        return ""

subba = "EAST" # texas ERCO East
start_date = "2024-09-27"
end_date = "2024-09-28"

df_demand = get_demand_data(subba, start_date, end_date)


In [50]:
df_demand.head()

Unnamed: 0,period,subba-name,value,value-units
0,2024-09-27T05,ERCO - East,1546,megawatthours
1,2024-09-27T04,ERCO - East,1698,megawatthours
2,2024-09-27T03,ERCO - East,1841,megawatthours
3,2024-09-27T02,ERCO - East,1948,megawatthours
4,2024-09-27T01,ERCO - East,2040,megawatthours


# Weather data collection

In [75]:
WEATHER_API_KEY = '820479673a8444f69ac162421242809'

def get_weather_data(location, start_date, end_date):
    weather_url = "https://api.worldweatheronline.com/premium/v1/past-weather.ashx?key={0}&q={1}&format=json&date={2}&enddate={3}&tp=1".format(WEATHER_API_KEY, location, start_date, end_date)

    response = requests.get(weather_url)
    if response.status_code == 200:
        json_data = response.json()
        df_weather = pd.DataFrame(json_data["data"]["weather"])
        return df_weather

    else:
        print(f"Failed to retrieve data: {response.status_code}")
        return ""
    
def process_weather_data(df_weather):
    processed_data = []
    for index, row in df_weather.iterrows():
        date = row["date"]
        hourly_data = row["hourly"]

        for hour in hourly_data:
            time = hour["time"]
            time = time.zfill(4)
            hour_of_day = time[:2]

            datetime_str = f"{date}T{hour_of_day}"
            
            hour["datetime"] = datetime_str
            processed_data.append(hour) 

    df_processed = pd.DataFrame(processed_data)
    cols = ['datetime'] + [col for col in df_processed.columns if col != 'datetime']
    df_processed = df_processed[cols]
    df_processed = df_processed.drop(columns=['time', 'tempC', 'windspeedKmph', 'weatherIconUrl', 'weatherDesc', 'winddirDegree', 'winddir16Point'])

    return df_processed
    
    
# start date and end date should be same year for this api
start_date = '27-SEP-2024'
end_date = '28-SEP-2024'
location = "30.2672,-97.7431" # austin

df_weather = get_weather_data(location, start_date, end_date)
df_processed = process_weather_data(df_weather)
     

In [76]:
df_weather.head()

Unnamed: 0,date,astronomy,maxtempC,maxtempF,mintempC,mintempF,avgtempC,avgtempF,totalSnow_cm,sunHour,uvIndex,hourly
0,2024-09-27,"[{'sunrise': '07:23 AM', 'sunset': '07:21 PM',...",33,92,19,67,26,79,0.0,12.0,7,"[{'time': '0', 'tempC': '23', 'tempF': '74', '..."
1,2024-09-28,"[{'sunrise': '07:23 AM', 'sunset': '07:19 PM',...",35,96,21,69,27,81,0.0,12.0,7,"[{'time': '0', 'tempC': '25', 'tempF': '76', '..."


In [77]:
df_processed.head()

Unnamed: 0,datetime,tempF,windspeedMiles,weatherCode,precipMM,precipInches,humidity,visibility,visibilityMiles,pressure,...,HeatIndexF,DewPointC,DewPointF,WindChillC,WindChillF,WindGustMiles,WindGustKmph,FeelsLikeC,FeelsLikeF,uvIndex
0,2024-09-27T00,74,4,113,0.0,0.0,50,10,6,1010,...,77,12,54,23,74,7,11,25,77,1
1,2024-09-27T01,73,3,113,0.0,0.0,52,10,6,1010,...,75,12,54,23,73,6,10,24,75,1
2,2024-09-27T02,71,3,113,0.0,0.0,55,10,6,1010,...,73,12,54,22,71,7,11,22,71,1
3,2024-09-27T03,70,4,113,0.0,0.0,57,10,6,1010,...,71,12,54,21,70,9,14,21,70,1
4,2024-09-27T04,69,5,113,0.0,0.0,59,10,6,1010,...,70,12,54,21,69,10,16,21,69,1


# Merging demand and weather

In [45]:
zones_texas = {
    "COAS": [29.7604, -95.3698],  # Houston
    "EAST": [32.3513, -94.7404],  # Approximate center
    "FWES": [31.7619, -106.4850],  # El Paso (not in ERCOT, for reference)
    "NCEN": [32.7767, -96.7970],  # Dallas
    "NRTH": [33.5007, -101.6568],  # Approximate center
    "SCEN": [30.2672, -97.7431],  # Austin
    "SOUT": [29.4241, -98.4936],  # San Antonio
    "WEST": [31.8457, -102.3676]  # Approximate center
}

zones_new_england = {
    "4001": [45.2538, -69.4455],  # Maine
    "4002": [43.1939, -71.5724],  # New Hampshire
    "4003": [44.5588, -72.5778],  # Vermont
    "4004": [41.6032, -72.7290],  # Connecticut
    "4005": [41.5801, -71.4774],  # Rhode Island
    "4006": [41.9084, -70.7245],  # Southeast Mass.
    "4007": [42.1172, -72.5399],  # Western/Central Mass.
    "4008": [42.3601, -71.0589]   # Northeast Mass.
}

In [101]:
from datetime import datetime

def split_dates_yearwise(start_date, end_date):
    start_year = datetime.strptime(start_date, "%Y-%m-%d").year
    end_year = datetime.strptime(end_date, "%Y-%m-%d").year
    
    date_ranges = []
    
    for year in range(start_year, end_year + 1):
        if year == start_year:
            year_start = start_date  # Use the specified start date for the first year
        else:
            year_start = f"{year}-01-01"  # First day of the year for subsequent years
            
        year_end = f"{year}-12-31" if year < end_year else end_date  # End of the year or specified end_date
        date_ranges.append([year_start, year_end])
    
    return date_ranges

def generate_dataset(zones, start_date, end_data):
    dates = split_dates_yearwise(start_date, end_data)
    df_map = {}

    
    for zone in zones:
        print(zone)
        zone_name = zone
        date_df_list = []

        for date in dates:
            start = date[0]
            end = date[1] 
            print(start, end)

            df_demand = get_demand_data(zone, start, end)
            
            city_location = ','.join(map(str, zones_texas[zone]))
            df_weather = get_weather_data(city_location, start_date, end_date)
            df_weather = process_weather_data(df_weather)

            df_weather.rename(columns={'datetime': 'datetime'}, inplace=True)
            df_demand.rename(columns={'period': 'datetime'}, inplace=True)
            df_merged_dataset = pd.merge(df_weather, df_demand, on='datetime', how='inner')
            df_merged_dataset['zone'] = zone_name
            date_df_list.append(df_merged_dataset)
        
        combined_df = pd.concat(date_df_list, ignore_index=True)
        df_map[zone] = combined_df
    return df_map

start_date = "2023-12-25"
end_data = "2024-01-02"
df_map = generate_dataset(zones_texas, start_date, end_data)





COAS
2023-12-25 2023-12-31
2024-01-01 2024-01-02


In [100]:
df_map["NRTH"].head()

Unnamed: 0,datetime,tempF,windspeedMiles,weatherCode,precipMM,precipInches,humidity,visibility,visibilityMiles,pressure,...,WindChillF,WindGustMiles,WindGustKmph,FeelsLikeC,FeelsLikeF,uvIndex,subba-name,value,value-units,zone
0,2023-12-25T00,39,16,113,0.0,0.0,60,10,6,1018,...,30,23,37,-1,30,1,ERCO - North,1342,megawatthours,NRTH
1,2023-12-25T01,38,16,113,0.0,0.0,62,10,6,1019,...,28,23,37,-2,28,1,ERCO - North,1379,megawatthours,NRTH
2,2023-12-25T02,36,16,113,0.0,0.0,63,10,6,1019,...,27,23,37,-3,27,1,ERCO - North,1382,megawatthours,NRTH
3,2023-12-25T03,35,17,113,0.0,0.0,62,10,6,1020,...,26,24,39,-4,26,1,ERCO - North,1393,megawatthours,NRTH
4,2023-12-25T04,35,16,113,0.0,0.0,61,10,6,1020,...,25,23,38,-4,25,1,ERCO - North,1404,megawatthours,NRTH


In [95]:
df_combined = pd.concat(df_map.values(), ignore_index=True)
df_combined.sort_values(by='datetime', inplace=True)
df_combined.to_csv('merged_zones_weather_demand_data.csv', index=False)