# Plot the Load Characteristics of Data Centers

In [None]:
# Start by importing the packages we need:
import os
import datetime

import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt

from glob import glob
from matplotlib import pyplot 
from mpl_toolkits.axes_grid1 import make_axes_locatable


## Set the Directory Structure

In [None]:
# Identify the data input and output directories:
dc_load_input_dir =  '/Users/burl878/Documents/Code/code_repos/data_center_loads/data/'
tell_output_dir =  '/Users/burl878/Documents/IMMM/Data_Centers/Data/TELL_Output/'
image_output_dir =  '/Users/burl878/Documents/Code/code_repos/data_center_loads/figures/'


## Create a Function to Plot the Distribution of Data Center Loads by Balancing Authority


In [None]:
def plot_ba_dc_load_distributions(im3_scenario: str, epri_scenario: str, year: str, tell_output_dir: str, image_output_dir: str, image_resolution: int, save_images=False):
    
    # Set the short scenario name based on the EPRI scenario:
    if epri_scenario == 'Low-Growth Scenario':
       epri_scenario_short = 'low_growth'
    elif epri_scenario == 'Moderate-Growth Scenario':
       epri_scenario_short = 'moderate_growth'
    elif epri_scenario == 'High-Growth Scenario':
       epri_scenario_short = 'high_growth'
    elif epri_scenario == 'Higher-Growth Scenario':
       epri_scenario_short = 'higher_growth'
        
    # Read in the load time series:
    tell_ba_df = pd.read_csv((tell_output_dir + im3_scenario + '/' + year + '/' + 'TELL_BA_Loads_' + epri_scenario_short + '.csv'))

    # Calculate the hourly data center loads:
    tell_ba_df['DC_Load_MWh'] = tell_ba_df['Scaled_TELL_BA_Load_with_DC_MWh'] - tell_ba_df['Scaled_TELL_BA_Load_MWh']

    # Sum the non data center and data center loads into annual total loads by BA and convert that value from MWh to TWh:
    tell_ba_df['Non_DC_Annual_Load_TWh'] = (tell_ba_df.groupby('BA_Code')['Scaled_TELL_BA_Load_MWh'].transform('sum')) / 1000000
    tell_ba_df['DC_Annual_Load_TWh'] = (tell_ba_df.groupby('BA_Code')['DC_Load_MWh'].transform('sum')) / 1000000
    tell_ba_df['Annual_Load_TWh'] = tell_ba_df['Non_DC_Annual_Load_TWh'] + tell_ba_df['DC_Annual_Load_TWh']
    tell_ba_df['DC_Percentage'] = 100*(tell_ba_df['DC_Annual_Load_TWh'].div(tell_ba_df['Annual_Load_TWh'])).round(4)
    
    # Make a copy of the necessary variables and drop the duplicates:
    plotting_df = tell_ba_df[['BA_Code','Non_DC_Annual_Load_TWh', 'DC_Annual_Load_TWh', 'DC_Percentage']].copy(deep=False)
    plotting_df = plotting_df.drop_duplicates()

    # Create an x-axis the length of the dataframe to be used in plotting:
    x_axis = np.arange(len(plotting_df))
    
    # Make the plot:
    plt.figure(figsize=(20, 30))
    plt.rcParams['font.size'] = 18

    plt.subplot(311)
    plt.grid(True)
    plt.bar(x_axis, plotting_df.sort_values(by=['DC_Annual_Load_TWh'], ascending=True)['DC_Annual_Load_TWh'], 0.75)
    plt.xticks(x_axis, plotting_df.sort_values(by=['DC_Annual_Load_TWh'], ascending=True)['BA_Code'], rotation=90)
    plt.ylabel('Data Center Annual Load [TWh]', fontsize=18)
    plt.title(('Data Center Loads: ' + im3_scenario + ', EPRI ' + epri_scenario + ', ' + year), fontsize=21)
    plt.title('a)', loc='left', fontsize=18)

    plt.subplot(312)
    plt.grid(True)
    plt.bar(x_axis, plotting_df.sort_values(by=['Non_DC_Annual_Load_TWh'], ascending=True)['Non_DC_Annual_Load_TWh'], 0.75)
    plt.xticks(x_axis, plotting_df.sort_values(by=['Non_DC_Annual_Load_TWh'], ascending=True)['BA_Code'], rotation=90)
    plt.ylabel('Non Data Center Annual Load [TWh]', fontsize=18)
    plt.title(('All Other Loads: ' + im3_scenario + ', EPRI ' + epri_scenario + ', ' + year), fontsize=21)
    plt.title('b)', loc='left', fontsize=18)

    plt.subplot(313)
    plt.grid(True)
    plt.bar(x_axis, plotting_df.sort_values(by=['DC_Percentage'], ascending=True)['DC_Percentage'], 0.75)
    plt.xticks(x_axis, plotting_df.sort_values(by=['DC_Percentage'], ascending=True)['BA_Code'], rotation=90)
    plt.ylabel('Percentage of Total [%]', fontsize=18)
    plt.title(('Data Center Percentage of Total Loads: ' + im3_scenario + ', EPRI ' + epri_scenario + ', ' + year), fontsize=21)
    plt.title('c)', loc='left', fontsize=18)
        
    # If the "save_images" flag is set to true then save the plot to a .png file:
    if save_images == True:
       plt.savefig(os.path.join(image_output_dir + 'BA_DC_Loads_' + im3_scenario + '_' + epri_scenario_short + '.png'), dpi=image_resolution, bbox_inches='tight')


## Make the Plot


In [None]:
plot_ba_dc_load_distributions(im3_scenario = 'rcp85hotter_ssp3',
                              epri_scenario = 'Higher-Growth Scenario',
                              year = '2030',
                              tell_output_dir = tell_output_dir,
                              image_output_dir = image_output_dir, 
                              image_resolution = 300, 
                              save_images = True)


# Create a Function to Plot the Load Time Series by BA


In [None]:
def plot_ba_load_time_series(im3_scenario: str, epri_scenario: str, ba:str, year: str, tell_output_dir: str, image_output_dir: str, image_resolution: int, save_images=False):
    
    # Set the short scenario name based on the EPRI scenario:
    if epri_scenario == 'Low-Growth Scenario':
       epri_scenario_short = 'low_growth'
    elif epri_scenario == 'Moderate-Growth Scenario':
       epri_scenario_short = 'moderate_growth'
    elif epri_scenario == 'High-Growth Scenario':
       epri_scenario_short = 'high_growth'
    elif epri_scenario == 'Higher-Growth Scenario':
       epri_scenario_short = 'higher_growth'
        
    # Read in the load time series:
    tell_ba_df = pd.read_csv((tell_output_dir + im3_scenario + '/' + year + '/' + 'TELL_BA_Loads_' + epri_scenario_short + '.csv'))

    # Subset to just the data for the BA you want to plot
    tell_ba_df = tell_ba_df[tell_ba_df['BA_Code'].isin([ba])]
    
    # Calculate the hourly data center loads:
    tell_ba_df['DC_Load_MWh'] = tell_ba_df['Scaled_TELL_BA_Load_with_DC_MWh'] - tell_ba_df['Scaled_TELL_BA_Load_MWh']

    # Convert the time to a datetime variable:
    tell_ba_df['Time_UTC'] = pd.to_datetime(tell_ba_df['Time_UTC'], format='%Y-%m-%d %H:%M:%S')

    # Calculate the hourly load fraction due to data centers:
    tell_ba_df['DC_Percentage'] = 100*(tell_ba_df['DC_Load_MWh'].div(tell_ba_df['Scaled_TELL_BA_Load_with_DC_MWh'])).round(4)
    
    
    # Make the plot:
    plt.figure(figsize=(24, 20))
    plt.rcParams['font.size'] = 18

    plt.subplot(211)
    plt.grid(True)
    plt.plot(tell_ba_df['Time_UTC'], tell_ba_df['Scaled_TELL_BA_Load_with_DC_MWh'], color='r', linestyle='-', label='TELL Loads with Data Centers', linewidth=1)
    plt.plot(tell_ba_df['Time_UTC'], tell_ba_df['Scaled_TELL_BA_Load_MWh'], color='k', linestyle='-', label='Original TELL Load', linewidth=1)
    plt.plot(tell_ba_df['Time_UTC'], tell_ba_df['DC_Load_MWh'], color='b', linestyle='-', label='Data Center Loads', linewidth=1)
    plt.legend(loc='upper left', prop={'size': 14})
    plt.xlabel('')
    plt.ylabel('Hourly Load [MWh]')
    plt.title((ba + ' Load Time Series in ' + year + ': ' + im3_scenario + ', EPRI ' + epri_scenario))
    plt.title('a)', loc='left', fontsize=16)

    plt.subplot(212)
    plt.grid(True)
    plt.plot(tell_ba_df['Time_UTC'], tell_ba_df['DC_Percentage'], color='k', linestyle='-', label='Data Center Load Percentage', linewidth=1)
    plt.legend(loc='upper left', prop={'size': 14})
    plt.xlabel('')
    plt.ylabel('Percentage of Total [%]')
    plt.title((ba + ' Data Center Percentage of Total Loads in ' + year + ': ' + im3_scenario + ', EPRI ' + epri_scenario))
    plt.title('b)', loc='left', fontsize=16)

    # If the "save_images" flag is set to true then save the plot to a .png file:
    if save_images == True:
       plt.savefig(os.path.join(image_output_dir + ba + '_Time_Series_' + im3_scenario + '_' + epri_scenario_short + '.png'), dpi=image_resolution, bbox_inches='tight')


In [None]:
plot_ba_load_time_series(im3_scenario = 'rcp85hotter_ssp3',
                         epri_scenario = 'Higher-Growth Scenario',
                         ba = 'PJM',
                         year = '2030',
                         tell_output_dir = tell_output_dir,
                         image_output_dir = image_output_dir, 
                         image_resolution = 300, 
                         save_images = True)


# Plot the Data Center Load Impacts by State

In [None]:
def plot_state_distributions(im3_scenario: str, epri_scenario: str, dc_load_input_dir: str, tell_output_dir: str, image_output_dir: str, image_resolution: int, save_images=False):
    
    # Set the short scenario name based on the EPRI scenario:
    if epri_scenario == 'Low-Growth Scenario':
       epri_scenario_short = 'low_growth'
    elif epri_scenario == 'Moderate-Growth Scenario':
       epri_scenario_short = 'moderate_growth'
    elif epri_scenario == 'High-Growth Scenario':
       epri_scenario_short = 'high_growth'
    elif epri_scenario == 'Higher-Growth Scenario':
       epri_scenario_short = 'higher_growth'
        
    # Read in the state-level
    state_df = pd.read_csv((tell_output_dir + im3_scenario + '/2030/' + 'TELL_State_Summary_Data_' + epri_scenario_short + '.csv'))

    # Calculate the load fraction due to data centers:
    state_df['DC_Percentage'] = 100*((state_df['Total_DC_Load_MWh']/ 1000000).div(state_df['Loads_With_DC_TWh'])).round(4)

    # Read in the data from the Excel file:
    epri_df = pd.read_excel((dc_load_input_dir + 'EPRI_2024_Projections.xlsx'), sheet_name='Sheet1')
    
    # Subset the data by scenario and clean up the naming:
    if epri_scenario == 'Low-Growth Scenario':
       epri_df = epri_df[['State', '2030 Load Low-Growth Scenario (MWh/y)', '% of State Consumed.1']].copy()
       epri_df.rename(columns={'State': 'State_Name', '2030 Load Low-Growth Scenario (MWh/y)': 'EPRI_DC_Load_MWh', '% of State Consumed.1': 'EPRI_DC_Percentage'}, inplace=True)
    elif epri_scenario == 'Moderate-Growth Scenario':
       epri_df = epri_df[['State', '2030 Load Moderate-Growth Scenario (MWh/y)', '% of State Consumed.2']].copy()
       epri_df.rename(columns={'State': 'State_Name', '2030 Load Moderate-Growth Scenario (MWh/y)': 'EPRI_DC_Load_MWh', '% of State Consumed.2': 'EPRI_DC_Percentage'}, inplace=True)
    elif epri_scenario == 'High-Growth Scenario':
       epri_df = epri_df[['State', '2030 Load High-Growth Scenario (MWh/y)', '% of State Consumed.3']].copy()
       epri_df.rename(columns={'State': 'State_Name', '2030 Load High-Growth Scenario (MWh/y)': 'EPRI_DC_Load_MWh', '% of State Consumed.3': 'EPRI_DC_Percentage'}, inplace=True)
    elif epri_scenario == 'Higher-Growth Scenario':
       epri_df = epri_df[['State', '2030 Load Higher-Growth Scenario (MWh/y)', '% of State Consumed.4']].copy()
       epri_df.rename(columns={'State': 'State_Name', '2030 Load Higher-Growth Scenario (MWh/y)': 'EPRI_DC_Load_MWh', '% of State Consumed.4': 'EPRI_DC_Percentage'}, inplace=True)

    # Align the units:
    state_df['Total_DC_Load_TWh'] = state_df['Total_DC_Load_MWh'] / 1000000
    epri_df['EPRI_DC_Load_TWh'] =  epri_df['EPRI_DC_Load_MWh'] / 1000000
    epri_df['EPRI_DC_Percentage'] =  epri_df['EPRI_DC_Percentage']*100

    # Merge the TELL and data center dataframes together based on common state names:
    state_df = state_df.merge(epri_df, on=['State_Name'], how='left')

    # Replace NaN values with 0:
    state_df['EPRI_DC_Load_MWh'] = state_df['EPRI_DC_Load_MWh'].fillna(0)
    state_df['EPRI_DC_Percentage'] = state_df['EPRI_DC_Percentage'].fillna(0)

    # Create a 1:1 line:
    one_to_one_mwh = np.arange(0, 1.02*state_df['EPRI_DC_Load_TWh'].max(), 0.1)
    one_to_one = np.arange(0, 102, 1)
    
    # Make the plot:
    plt.figure(figsize=(24, 20))
    plt.rcParams['font.size'] = 18

    plt.subplot(211)
    plt.grid(True)
    plt.plot(one_to_one_mwh,one_to_one_mwh,'k', linewidth=1, label = '1:1')
    plt.scatter(state_df['EPRI_DC_Load_TWh'], state_df['Total_DC_Load_TWh'], s=50, c='blue')
    plt.xlim(0, 1.02*state_df['EPRI_DC_Load_TWh'].max())
    plt.ylim(0, 1.02*state_df['EPRI_DC_Load_TWh'].max())
    plt.xlabel('EPRI Data Center Loads [TWh]')
    plt.ylabel('TELL Data Center Loads [TWh]')
    plt.title(('State Data Center Loads Comparison with EPRI in 2030: ' + im3_scenario + ', EPRI ' + epri_scenario))
    plt.title('a)', loc='left', fontsize=16)

    plt.subplot(212)
    plt.grid(True)
    plt.plot(one_to_one,one_to_one,'k', linewidth=1, label = '1:1')
    plt.scatter(state_df['EPRI_DC_Percentage'], state_df['DC_Percentage'], s=50, c='blue')
    plt.xlim(0, 1.02*state_df['EPRI_DC_Percentage'].max())
    plt.ylim(0, 1.02*state_df['EPRI_DC_Percentage'].max())
    plt.xlabel('EPRI Data Center Percentage of Total Loads [%]')
    plt.ylabel('TELL Data Center Percentage of Total Loads [%]')
    plt.title(('State Data Center Percent of Total Loads in 2030: ' + im3_scenario + ', EPRI ' + epri_scenario))
    plt.title('b)', loc='left', fontsize=16)

    # If the "save_images" flag is set to true then save the plot to a .png file:
    if save_images == True:
       plt.savefig(os.path.join(image_output_dir + 'State_Distributions_2030_' + im3_scenario + '_' + epri_scenario_short + '.png'), dpi=image_resolution, bbox_inches='tight')
        
    return state_df


In [None]:
output_df = plot_state_distributions(im3_scenario = 'rcp85hotter_ssp3',
                                     epri_scenario = 'Higher-Growth Scenario',
                                     dc_load_input_dir = dc_load_input_dir,
                                     tell_output_dir = tell_output_dir,
                                     image_output_dir = image_output_dir, 
                                     image_resolution = 300, 
                                     save_images = True)

output_df
