In [36]:
import pandas as pd
import numpy as np
import glob
import re
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from datetime import datetime, timedelta

%reload_ext autoreload
%autoreload 2

#### Downscale LADWP hourly generation to Harbor region using Harbor peak fraction of system peak.
#### To be used for max excess energy.

In [37]:
harbor_fraction_system_peak = 0.073

In [38]:
#Loop through folder of files with EIA LADWP hourly net generation data by energy source, concatenate into one file.
path = 'data/grid_emissions/LADWP_hourly_gen_by_source'
filenames = glob.glob(path + "/*.csv")

dfs = []
for filename in filenames:
    dfs.append(pd.read_csv(filename))

# concatenate into one dataframe
hourly_gen_by_source = pd.concat(dfs, ignore_index=True)
hourly_gen_by_source.columns = hourly_gen_by_source.columns.str.replace('\s+', '_') 

In [39]:
#Split date and hour (ending).
hourly_gen_by_source = hourly_gen_by_source.join(hourly_gen_by_source['Timestamp_(Hour_Ending)'].str.split(r' ',1,expand=True).rename(columns={0:'date', 1:'hour_ending'}))

#Drop duplicate datetimes.
hourly_gen_by_source.drop_duplicates(ignore_index=True, inplace=True)

#Split hour (ending) and am/pm.
hourly_gen_by_source = hourly_gen_by_source.join(hourly_gen_by_source['hour_ending'].str.split(r' ',1,expand=True).rename(columns={0:'hour_ends', 1:'am/pm'}))

hourly_gen_by_source['am/pm'] = hourly_gen_by_source['am/pm'].str[:-4]
hourly_gen_by_source['date'] = pd.to_datetime(hourly_gen_by_source['date'])
hourly_gen_by_source['hour_ends'] = pd.to_numeric(hourly_gen_by_source['hour_ends'])

In [40]:
for index, row in hourly_gen_by_source[hourly_gen_by_source['Total_Generation_(MWh)'].isna()].iterrows():
    date = hourly_gen_by_source.loc[index,'date']
    date_previous = date-timedelta(days=1)
    time = hourly_gen_by_source.loc[index,'hour_ends']
    am_pm = hourly_gen_by_source.loc[index,'am/pm']
    
    previous_day_inds = hourly_gen_by_source['date'] == date_previous
    time_inds = hourly_gen_by_source['hour_ends'] == time
    am_pm_inds = hourly_gen_by_source['am/pm'] == am_pm
    previous_gen = hourly_gen_by_source[previous_day_inds & time_inds & am_pm_inds]['Total_Generation_(MWh)'].item()
    previous_coal = hourly_gen_by_source[previous_day_inds & time_inds & am_pm_inds]['Coal_Generation_(MWh)'].item()
    if previous_gen == 0:
        print(index)
    if previous_coal == 0:
        print(index)
    
    hourly_gen_by_source.loc[index,'Total_Generation_(MWh)'] = previous_gen
    hourly_gen_by_source.loc[index,'Coal_Generation_(MWh)'] = previous_coal
    
for index, row in hourly_gen_by_source[hourly_gen_by_source['Coal_Generation_(MWh)']<0].iterrows():
    hourly_gen_by_source.loc[index,'Coal_Generation_(MWh)']=0

In [41]:
pm_inds = hourly_gen_by_source['am/pm']=='p.m.'
am_inds = hourly_gen_by_source['am/pm']=='a.m.'

hourly_gen_by_source_pm = hourly_gen_by_source[pm_inds].reset_index()
for row in hourly_gen_by_source_pm.index:
    if hourly_gen_by_source_pm.loc[row,'hour_ends']<12:
        hourly_gen_by_source_pm.loc[row,'hour_ends']=hourly_gen_by_source_pm.loc[row,'hour_ends']+12

hourly_gen_by_source_am = hourly_gen_by_source[am_inds].reset_index()
for row in hourly_gen_by_source_am.index:
    if hourly_gen_by_source_am.loc[row,'hour_ends']== 12:
        hourly_gen_by_source_am.loc[row,'date']= hourly_gen_by_source_am.loc[row,'date']-timedelta(days=1)
        hourly_gen_by_source_am.loc[row,'hour_ends'] = 24
        
hourly_gen_reformatted = pd.concat([hourly_gen_by_source_am, hourly_gen_by_source_pm], ignore_index=True)

hourly_gen_reformatted['hour'] = (hourly_gen_reformatted['hour_ends'])-1
hourly_gen_reformatted.drop(columns=['index','hour_ending','hour_ends','am/pm','Timestamp_(Hour_Ending)'], inplace=True)
hourly_gen_reformatted = hourly_gen_reformatted.sort_values(by=['date','hour'])
hourly_gen_by_source_reformatted = hourly_gen_reformatted[hourly_gen_reformatted.date >= '2019-01-01']

In [42]:
hourly_gen_by_source_reformatted.columns

Index(['BA_Code', 'Total_Generation_(MWh)', 'Wind_Generation_(MWh)',
       'Solar_Generation_(MWh)', 'Hydro_Generation_(MWh)',
       'Other_Generation_(MWh)', 'Natural_gas_Generation_(MWh)',
       'Coal_Generation_(MWh)', 'date', 'hour'],
      dtype='object')

In [43]:
hourly_gen_by_source_reformatted = hourly_gen_by_source_reformatted[['Total_Generation_(MWh)', 'date', 'hour']]

In [45]:
hourly_gen_by_source_reformatted = hourly_gen_by_source_reformatted.reset_index(drop=True)

In [48]:
hourly_gen_by_source_reformatted['Total_Generation_(MWh)']=hourly_gen_by_source_reformatted['Total_Generation_(MWh)']*harbor_fraction_system_peak

In [51]:
hourly_gen_by_source_reformatted.to_csv('data/ladwp_hourly_gen_downscaled.csv')