In [1]:
import pygsheets # use 'pip install pygsheets'
import numpy
import datetime

import pandas as pd
import geopandas as gpd
from geopandas.tools import overlay

import shapely

In [2]:
gpd.__version__

'0.9.0'

In [3]:
credentials_directory = '/Users/baird/Dropbox/_google-api/'
gc = pygsheets.authorize(client_secret=credentials_directory+'client_secret.json')

spreadsheet = gc.open_by_key('1foPLE6K-uqFlaYgLPAUxzeXfDO5wOOqE7tibNHeqTek')
#spreadsheet[1] "Gas Pipelines" tab is the second index
gas_pipes = spreadsheet[1].get_as_df()
oil_pipes = spreadsheet[3].get_as_df()
owners = spreadsheet[2].get_as_df()

gas_pipes = gas_pipes.drop('WKTFormat', axis=1) # delete WKTFormat column
#oil_pipes = oil_pipes.drop('WKTFormat', axis=1)

In [55]:
status_in_dev = ['proposed', 'construction', 'shelved', 'operating', 'mothballed', 'cancelled', 'retired', 'idle']
no_route_options = [
    'Unavailable', 
    'Capacity expansion only', 
    'Bidirectionality upgrade only',
    'Short route to add later',
    'N/A',
    ''
]

# filter for the statuses above in the status_in_dev list (modify as desired)
gas_pipes = gas_pipes[gas_pipes['Status'].str.lower().isin(status_in_dev)]

In [56]:
oil_pipes = oil_pipes.dropna(subset=['ProjectID'])

# exclude oil rows with "don't add" for ProjectID
oil_pipes = oil_pipes[oil_pipes['ProjectID']!="don't add"]

In [57]:
def convert_gfit_to_linestring(coord_str, pipeline_name, segment_name):
    '''
    Takes string from GFIT column of coordinates for a single pipeline,
    converts that string into Shapely LineString or MultiLinestring.
    '''
    #print(coord_str, pipeline_name)
    if ':' in coord_str and ';' not in coord_str:
        # simple geometry; no branching
        # create nested list of lists, separating on colons        
        coord_list = coord_str.split(':')
        coord_list_tuples = []
        # non-branched pipeline (nested list with one level)
        # convert nested list of lists to list of tuples
        try:
            for element in coord_list:
                element_tuple = (float(element.split(',')[1]), 
                                 float(element.split(',')[0]))
                coord_list_tuples.append(element_tuple)
        except:
            print(f"Exception for {pipeline_name}; {segment_name}; element: {element}") # for db
        route_conv = shapely.geometry.LineString(coord_list_tuples)

    elif ':' in coord_str and ';' in coord_str:
        # create a nested list of lists, separating on semicolons
        coord_list = coord_str.split(';')   
        # create a second level of nesting, separating on colons
        coord_list = [x.split(':') for x in coord_list]
        # branched pipeline (nested list with two levels)
        route_conv_list_all = []
        
        for nested_list in coord_list:
            coord_list_tuples = []
            # process element
            try:
                for element in nested_list:
                    element_tuple = (float(element.split(',')[1]), 
                                     float(element.split(',')[0]))
                    coord_list_tuples.append(element_tuple)
            except:
                print(f"Exception for {pipeline_name}; segment {segment_name}; element: {element}") # for db
            # process coord_list_tuples
            try:
                route_conv_list = shapely.geometry.LineString(coord_list_tuples)
                route_conv_list_all.append(route_conv_list)
            except:
                print(f"Exception for {pipeline_name}; coord_list_tuples: {coord_list_tuples}") # for db
                pass
        
        route_conv = shapely.geometry.MultiLineString(route_conv_list_all)
        
    return route_conv

In [60]:
def convert_all_pipelines(df):
    """
    Apply the conversion function to all pipelines in the dataframe.
    """
    # create geometry column with empty strings
    df['geometry'] = ''
    
    # filter to keep only pipelines with routes
    mask_route = df['Route'].str.contains(',' or ':')
    pipes_with_route = df.loc[mask_route]
    
    for row in pipes_with_route.index:
        route_str = df.at[row, 'Route']
        pipeline_name = df.at[row, 'PipelineName']
        segment_name = df.at[row, 'SegmentName']
        route_str_converted = convert_gfit_to_linestring(route_str, pipeline_name, segment_name)
        df.at[row, 'geometry'] = route_str_converted
    
    return df

In [61]:
# code to create a dataframe with WKT formatted geometry

# (1) copy gas_pipelines, clean up
to_convert = gas_pipes.copy()
#to_convert = to_convert.dropna(subset=['Route'])
to_convert = to_convert[~to_convert['Route'].isin(no_route_options)]

# (2) convert all pipelines
gas_pipes_wkt = convert_all_pipelines(to_convert)
gas_pipes_wkt = gas_pipes_wkt.reset_index(drop=True)

# (3) store in a GeoDataFrame, attach a projection, transform to a different one
gas_pipes_wkt_gdf = gpd.GeoDataFrame(gas_pipes_wkt, geometry=gas_pipes_wkt['geometry'])
gas_pipes_wkt_gdf = gas_pipes_wkt_gdf.set_crs('epsg:4326')
gas_pipes_wkt_gdf_4087 = gas_pipes_wkt_gdf.to_crs('epsg:4087')

In [62]:
now_string = datetime.datetime.now().strftime('%Y-%m-%d')
filename = 'routes_gdf_'+now_string+'.geojson'
gas_pipes_wkt_gdf.to_file(filename, driver='GeoJSON')
print('saved as', filename)

saved as routes_gdf_2021-11-08.geojson
