## Built Domain: Energy Transmission Metric Calculation
* % of power lines that are underground

In [1]:
import pandas as pd
import os
import sys
import boto3
import io
import geopandas as gpd
import numpy as np

# suppress pandas purely educational warnings
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_gpkg_from_directory, upload_csv_aws
from scripts.utils.write_metadata import append_metadata

In [None]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '2b_reproject/built_environment/utilities/ca_energy_commission/'

pull_gpkg_from_directory(bucket_name, aws_dir)

In [None]:
transmission_lines = gpd.read_file('built_cec_transmission_lines.gpkg')
transmission_lines = transmission_lines.to_crs(crs=4269)
print(len(transmission_lines))

In [None]:
transmission_lines.plot(column="Type")

In [None]:
transmission_lines.columns

### Want to keep line type, it length, and its geospatial positioning

In [None]:
columns_keep = ['Type', 'Length_Mil', 'geometry']
filtered_transmission_lines = transmission_lines[columns_keep].copy()

# ensure all entries within the 'Type' column are capitalized
filtered_transmission_lines.loc[:, 'Type'] = filtered_transmission_lines['Type'].str.upper()

# drop nan line types
filtered_transmission_lines = filtered_transmission_lines.dropna(subset=['Type'])
filtered_transmission_lines

In [None]:
unique_lines = filtered_transmission_lines['Type'].unique()
unique_lines

### Separate the data into two dataframes
* overhead lines
* underground lines (with underwater lines included)

In [None]:
# Create the overhead_lines DataFrame with 'OH' entries
overhead_lines = filtered_transmission_lines[filtered_transmission_lines['Type'] == 'OH']

# Create the underground_lines DataFrame with 'UG' and 'UW' entries
underground_lines = filtered_transmission_lines[filtered_transmission_lines['Type'].isin(['UG', 'UW'])]

print("Overhead Lines:")
print(overhead_lines)
print("\nUnderground Lines:")
print(underground_lines)

In [None]:
# read in CA census tiger file
tract_shp_dir = "s3://ca-climate-index/0_map_data/2021_tiger_census_tract/2021_ca_tract/"

ca_tract_boundaries = gpd.read_file(tract_shp_dir)
# need to rename columns so we don't have any duplicates in the final geodatabase
column_names = ca_tract_boundaries.columns
# new_column_names = ["USCB_"+column for column in column_names if column != "geometry"]
ca_tract_boundaries = ca_tract_boundaries.rename(columns={'GEOID':"TRACT"})
ca_tract_boundaries = ca_tract_boundaries.to_crs(crs=4269) 
ca_tract_boundaries.head()

### Spatial join the tract boundaries with each of the line types, starting with overhead
* sum the Length_Mil per line within a census tract
* included below this cell is code to check line lengths per tract before and after summing

In [None]:
# sjoin with tracts
joined_overhead_lines = ca_tract_boundaries.sjoin(overhead_lines).reset_index()
summed_overhead_lines = joined_overhead_lines.groupby('TRACT')['Length_Mil'].agg(['sum']).reset_index()
summed_overhead_lines = pd.merge(summed_overhead_lines,ca_tract_boundaries,on="TRACT", how='right')
summed_overhead_lines = summed_overhead_lines[['TRACT', 'sum']]
summed_overhead_lines = summed_overhead_lines.rename(columns={'TRACT':'census_tract', 'sum':'total_overhead_length_mi'})
summed_overhead_lines

## Check rows within a specified tract before and after summing

In [None]:
tract = '06001400100'

filtered_df = joined_overhead_lines[joined_overhead_lines['TRACT'] == tract]
filtered_df = filtered_df[['TRACT', 'Length_Mil']]

print(f'overhead transmission line rows for tract: {tract}')
print(filtered_df)
print('')

filtered_df = summed_overhead_lines[summed_overhead_lines['census_tract'] == tract]
filtered_df = filtered_df[['census_tract', 'total_overhead_length_mi']]

print(f'sum of milage for overhead transmission line rows for tract: {tract}')
print(filtered_df)

### Now spatial join with underground lines (much few rows than overhead lines)

In [None]:
# sjoin with tracts
joined_underground_lines = ca_tract_boundaries.sjoin(underground_lines).reset_index()
summed_underground_lines = joined_underground_lines.groupby('TRACT')['Length_Mil'].agg(['sum']).reset_index()
summed_underground_lines = pd.merge(summed_underground_lines,ca_tract_boundaries,on="TRACT", how='right')
summed_underground_lines = summed_underground_lines[['TRACT', 'sum']]
summed_underground_lines = summed_underground_lines.rename(columns={'TRACT':'census_tract', 'sum':'total_underground_length_mi'})
summed_underground_lines

### Merge the above and below ground spatially joined datasets together so we have total milage of lines respectively per census tract

In [None]:
all_transmission_lines = pd.merge(summed_overhead_lines,summed_underground_lines, on='census_tract')
all_transmission_lines

### As there are about 506 rows (out of 9129 before sjoin) that have underground values, most were nan, which wouldnt allow for a calculation to be made
* nan values were replaced with 0
* a new column was made that is the sum of above and below ground milage per tract
* another new column was made that calculated the percent of a tract has belowground transmission lines

In [None]:
# Fill NaN values with 0 across the entire DataFrame
all_transmission_lines = all_transmission_lines.fillna(0)

# Calculate the total length of transmission lines
all_transmission_lines['total_length_mi'] = all_transmission_lines['total_overhead_length_mi'] + all_transmission_lines['total_underground_length_mi']

# Calculate the percentage of underground transmission lines
# Handle division by zero by setting to 0 if total_length_mi is 0
all_transmission_lines['percent_underground_transmission_lines'] = 0
mask = all_transmission_lines['total_length_mi'] != 0
all_transmission_lines.loc[mask, 'percent_underground_transmission_lines'] = (
    all_transmission_lines['total_underground_length_mi'] / all_transmission_lines['total_length_mi']
) * 100

# Replace any remaining NaN values in 'percent_underground_transmission_lines' with 0
all_transmission_lines['percent_underground_transmission_lines'] = all_transmission_lines['percent_underground_transmission_lines'].replace({np.nan: 0})

# Print the DataFrame to verify the changes
all_transmission_lines

### Visualizing non 0 values

In [None]:
percent_above_zero = all_transmission_lines[all_transmission_lines['percent_underground_transmission_lines'] > 0]
print('length of non nan percentages:', len(percent_above_zero))
percent_above_zero.head(5)

In [17]:
# save final df to csv for upload
all_transmission_lines.to_csv('built_energy_transmission_lines_metric.csv', index=False)

### Function Call

In [101]:
@append_metadata
def transmission_line_upload(input_csv, export=False, varname=''):
    '''
    Uploads prepared transmission lines metric csv to S3 bucket. The metrics is:
    % of power lines that are underground

    Data for this energy transmission metric was sourced from California Energy Commission at:
    https://cecgis-caenergy.opendata.arcgis.com/datasets/CAEnergy::california-electric-transmission-lines-1/about

    Methods
    -------
    Relevant columns to Cal-CRAI's metric were isolated.
    Overhead transmission lines were separated from underground and underwater lines.
    The separated types of lines were spatially joined with California 2021 census tract shape files.
    Resulting files were merged together, with toal overhead and below ground lines summed together per census tract.
    Percentage underground lines were calculated per tract by dividing summed underground/underwater lines with total line counts.
    
    Parameters
    ----------
    input_csv: string
        csv transmission lines data 
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI transmission line metric to AWS
        True = will upload resulting df containing CAL CRAI transmission line metric to AWS

    Script
    ------
    built_transmission_lines.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''
    print('Data transformation: data cleaned by isolating relevant columns.')
    print('Data transformation: data was spatially joined to California census boudaries after separating overhead and below ground lines.')
    print('Data transformation: new columns calculated percentage underground/underwater lines per census tract.')

    if export == True:
        bucket_name = 'ca-climate-index'
        directory = '3_fair_data/index_data'
        export_filename = [input_csv]
        upload_csv_aws(export_filename, bucket_name, directory)

    if export == False:
        print(f'{input_csv} uplaoded to AWS.')

    if os.path.exists(input_csv):
        os.remove(input_csv)

In [96]:
built_energy_csv='built_energy_transmission_lines_metric.csv'
var = 'built_cec_transmission_lines'

transmission_line_upload(built_energy_csv, export=True, varname='test')

In [None]:
all_transmission_lines