In [12]:
# Start by importing the `tell` package and information about your operating system:
import os 
import tell

import numpy as np
import pandas as pd

from pandas import DataFrame


In [6]:
# Identify the current working directory, the subdirectory where the data will be stored, and the image output subdirectory:
current_dir =  os.path.join(os.path.dirname(os.getcwd()))
tell_data_dir = os.path.join(current_dir, r'tell_data')
tell_image_dir = os.path.join(tell_data_dir, r'visualizations')

# If the "tell_data_dir" subdirectory doesn't exist then create it:
if not os.path.exists(tell_data_dir):
   os.makedirs(tell_data_dir)

# If the "tell_image_dir" subdirectory doesn't exist then create it:
if not os.path.exists(tell_image_dir):
   os.makedirs(tell_image_dir)

data_input_dir = tell_data_dir

# Set the output directory based on the "raw_data_dir" variable:
output_dir = os.path.join(data_input_dir, r'tell_quickstarter_data', r'outputs', r'ba_service_territory')


In [7]:
# Set the target year:
target_year = 2019

# Set paths to files:
fips_file = os.path.join(data_input_dir, r'tell_raw_data', 'state_and_county_fips_codes.csv')
service_area_file = os.path.join(data_input_dir, r'tell_raw_data', r'EIA_861', f'{target_year}', f'Service_Territory_{target_year}.xlsx')
sales_ult_file = os.path.join(data_input_dir, r'tell_raw_data', r'EIA_861', f'{target_year}', f'Sales_Ult_Cust_{target_year}.xlsx')
bal_auth_file = os.path.join(data_input_dir, r'tell_raw_data', r'EIA_861', f'{target_year}', f'Balancing_Authority_{target_year}.xlsx')


In [13]:
# Run the "prepare_data" function:

# Read in the state and county FIPS code .csv file:
df_fips = pd.read_csv(fips_file)

# Read in the raw data from the EIA-861 Excel spreadsheets:
try:
    df_states = pd.read_excel(service_area_file, sheet_name='Counties')
except:
    df_states = pd.read_excel(service_area_file, sheet_name='Counties_States')
df_ult = pd.read_excel(sales_ult_file, sheet_name='States', skiprows=2)
df_ba = pd.read_excel(bal_auth_file)

# Strip the word "county" name from full reference and make it consistently lower case:
df_fips['county_lower'] = df_fips['county_name'].apply(lambda x: x.lower().split(' county')[0])

# Replace apostrophes:
df_fips['county_lower'] = df_fips['county_lower'].apply(lambda x: x.lower().split(' parish')[0]).str.replace("'", "")

# Make the state abbreviations and state names lower case:
df_fips['state_abbreviation'] = df_fips['state_abbreviation'].str.lower()

# Make the state names lower case:
df_states['State'] = df_states['State'].str.lower()

# Replace apostrophes:
df_states['county_lower'] = df_states['County'].str.lower().str.replace("'", "")

# Create a unified <state_abbrev>_<county_lower> key to merge by:
df_fips['fips_key'] = df_fips['state_abbreviation'] + '_' + df_fips['county_lower']
df_states['states_key'] = df_states['State'] + '_' + df_states['county_lower']

# Filter the "df_ult" and "df_ba" dataframes to only the columns we need:
try:
    df_ult = df_ult[["Utility Number", "Utility Name", "BA_CODE"]].rename(columns={'BA_CODE': 'BA Code'})
except:
    df_ult = df_ult[["Utility Number", "Utility Name", "BA Code"]]
df_ba = df_ba[["BA Code", "BA ID", "Balancing Authority Name"]]


In [18]:
# Run the "filter_one" function:

# Merge the 'df_states' and 'df_fips' dataframes based on the common key:
df_states_fips = pd.merge(left=df_states, right=df_fips, left_on='states_key', right_on='fips_key', how='left')

# Reassign a single variable as a float:
df_states_fips['Utility Number'] = df_states_fips['Utility Number'].astype(float)

# Merge the 'df_states_fips' and 'df_ult' dataframes based on the utility number:
df_fips_ult = df_states_fips.merge(df_ult, on='Utility Number', how='left')

# Merge the 'df_fips_ult' and 'df_ba' dataframes based on the BA code:
df_valid = df_fips_ult.merge(df_ba, left_on='BA Code', right_on='BA Code', how='left')

# Filter out rows that did not have a valid match:
df_nan = df_valid.loc[df_valid['county_lower_y'].isna()].copy()

# Drop the rows that did not have a valid match:
df_valid = df_valid.loc[~df_valid['county_lower_y'].isna()].copy()

# Drop unneeded columns from the valid and invalid dataframes:
df_valid.drop(columns=['county_lower_y', 'Utility Name_y'], inplace=True)
df_nan.drop(columns=['county_lower_y'], inplace=True)

df_fips_ult

Unnamed: 0,Data Year,Utility Number,Utility Name_x,Short Form,State,County,county_lower_x,states_key,state_name,state_abbreviation,state_FIPS,county_name,county_FIPS,county_lower_y,fips_key,Utility Name_y,BA Code
0,2019,34.0,City of Abbeville - (SC),Y,sc,Abbeville,abbeville,sc_abbeville,South Carolina,sc,45000.0,Abbeville County,45001.0,abbeville,sc_abbeville,City of Abbeville - (SC),DUK
1,2019,55.0,City of Aberdeen - (MS),,ms,Monroe,monroe,ms_monroe,Mississippi,ms,28000.0,Monroe County,28095.0,monroe,ms_monroe,City of Aberdeen - (MS),TVA
2,2019,59.0,City of Abbeville - (LA),,la,Vermilion,vermilion,la_vermilion,Louisiana,la,22000.0,Vermilion Parish,22113.0,vermilion,la_vermilion,City of Abbeville - (LA),MISO
3,2019,84.0,A & N Electric Coop,,md,Somerset,somerset,md_somerset,Maryland,md,24000.0,Somerset County,24039.0,somerset,md_somerset,A & N Electric Coop,PJM
4,2019,84.0,A & N Electric Coop,,md,Somerset,somerset,md_somerset,Maryland,md,24000.0,Somerset County,24039.0,somerset,md_somerset,A & N Electric Coop,PJM
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19448,2019,61432.0,Monterey Bay Community Power,,ca,Monterey,monterey,ca_monterey,California,ca,6000.0,Monterey County,6053.0,monterey,ca_monterey,Monterey Bay Community Power,CISO
19449,2019,61432.0,Monterey Bay Community Power,,ca,San Benito,san benito,ca_san benito,California,ca,6000.0,San Benito County,6069.0,san benito,ca_san benito,Monterey Bay Community Power,CISO
19450,2019,61432.0,Monterey Bay Community Power,,ca,Santa Cruz,santa cruz,ca_santa cruz,California,ca,6000.0,Santa Cruz County,6087.0,santa cruz,ca_santa cruz,Monterey Bay Community Power,CISO
19451,2019,62107.0,City of Industry,,ca,Los Angeles,los angeles,ca_los angeles,California,ca,6000.0,Los Angeles County,6037.0,los angeles,ca_los angeles,City of Industry,CISO
