In [4]:
import requests
from requests.auth import HTTPBasicAuth
import xmltodict
import pandas as pd
from datetime import datetime, timedelta

# Function to fetch data for a specific day with authentication
def fetch_data_for_day(date, username, password):
    # API endpoint with the specific date
    url = f'https://webservices.iso-ne.com/api/v1.1/genfuelmix/day/{date}'
    
    # Make the API request with basic authentication
    response = requests.get(url, auth=HTTPBasicAuth(username, password))
    
    # Check if the request was successful
    if response.status_code == 200:
        # Parse the XML response using xmltodict
        data_dict = xmltodict.parse(response.content)
        
        # Return the relevant list of 'GenFuelMix' data
        return data_dict['GenFuelMixes']['GenFuelMix']
    else:
        print(f"Failed to retrieve data for {date}. Status code: {response.status_code}")
        return None

# Function to aggregate data for multiple days and store in a list
def aggregate_data(data_list, aggregated_data):
    # List of all possible fuel types for the columns
    fuel_categories = ['Coal', 'Hydro', 'Natural Gas', 'Nuclear', 'Oil', 
                       'Other', 'Landfill Gas', 'Refuse', 'Solar', 'Wind', 'Wood']
    
    # Loop through the data and aggregate based on 'BeginDate'
    for entry in data_list:
        begin_date = entry['BeginDate']
        fuel_category = entry['FuelCategory']
        gen_mw = float(entry['GenMw'])
        
        # Initialize dictionary for a new timestamp if it doesn't exist
        if begin_date not in aggregated_data:
            aggregated_data[begin_date] = {fuel: 0 for fuel in fuel_categories}
            aggregated_data[begin_date]['BeginDate'] = begin_date
        
        # If the fuel category is one of the listed categories, aggregate its MW
        if fuel_category in aggregated_data[begin_date]:
            aggregated_data[begin_date][fuel_category] += gen_mw

# Function to save the aggregated data to a single CSV file
def save_all_data_to_csv(aggregated_data, output_filename):
    # Convert the aggregated data to a DataFrame
    df = pd.DataFrame.from_dict(aggregated_data, orient='index')
    
    # Ensure that the columns are in the desired order
    fuel_categories = ['BeginDate', 'Coal', 'Hydro', 'Natural Gas', 'Nuclear', 'Oil', 
                       'Other', 'Landfill Gas', 'Refuse', 'Solar', 'Wind', 'Wood']
    
    df = df[fuel_categories]
    
    # Save the DataFrame to a CSV file
    df.to_csv(output_filename, index=False)
    print(f"All data saved to {output_filename}")

# Main function to automate queries for a range of dates and store all data in one CSV
def automate_queries_with_auth_and_single_output(start_date, end_date, username, password, output_filename):
    current_date = datetime.strptime(start_date, "%Y%m%d")
    end_date = datetime.strptime(end_date, "%Y%m%d")
    
    # Dictionary to accumulate all data
    aggregated_data = {}
    
    while current_date <= end_date:
        date_str = current_date.strftime("%Y%m%d")
        
        # Fetch data for the current day with authentication
        data_list = fetch_data_for_day(date_str, username, password)
        
        # Aggregate the data into the main dictionary
        if data_list:
            aggregate_data(data_list, aggregated_data)
        
        # Move to the next day
        current_date += timedelta(days=1)
    
    # Once all data is aggregated, save to a single CSV
    save_all_data_to_csv(aggregated_data, output_filename)

# Example usage
username = 'alean@bu.edu'
password = 'Mq75eg8pxTBCEKY'
start_date = '20231001'
end_date = '20241001'
output_filename = 'genfuelmix_aggregatedyear.csv'

automate_queries_with_auth_and_single_output(start_date, end_date, username, password, output_filename)


All data saved to genfuelmix_aggregatedyear.csv
