# this code does not append pipelines without routes (it skips them)

In [19]:
import pygsheets # use 'pip install pygsheets'
import numpy
import datetime

import pandas
import geopandas
#import geopandas.tools import overlay

import shapely

In [20]:
credentials_directory = '/Users/baird/Dropbox/_google-api/'
gc = pygsheets.authorize(client_secret=credentials_directory+'client_secret.json')
spreadsheet = gc.open_by_key('1foPLE6K-uqFlaYgLPAUxzeXfDO5wOOqE7tibNHeqTek')

gas_pipes = spreadsheet.worksheet('title', 'Gas pipelines').get_as_df(start='A2')
oil_pipes = spreadsheet.worksheet('title', 'Oil/NGL pipelines').get_as_df(start='A2')

#gas_pipes.drop('WKTFormat', axis=1, inplace=True) # delete WKTFormat column
#oil_pipes.drop('WKTFormat', axis=1, inplace=True)

# delete columns that aren't the same in the sheets, to concatenate them...
columns_not_in_oil = list(set(gas_pipes.columns)-set(oil_pipes.columns))
columns_not_in_gas = list(set(oil_pipes.columns)-set(gas_pipes.columns))
gas_pipes.drop(columns=columns_not_in_oil, axis=1, inplace=True)
oil_pipes.drop(columns=columns_not_in_gas, axis=1, inplace=True)

# define whether you want just oil, just gas, or both

In [21]:
#fuel_type = 'oil'
#fuel_type = 'gas'
fuel_type = 'oil-and-gas'

if fuel_type=='oil':
    pipes_orig = oil_pipes
    #pipe.drop(column='CapacityBOEd', inplace=True)
elif fuel_type=='gas':
    pipes_orig = gas_pipes
    #pipe.drop('CapacityBcm/y', inplace=True)
elif fuel_type=='oil-and-gas':  
    pipes_orig = pandas.concat([oil_pipes, gas_pipes], ignore_index=True)

pipes_orig.replace('--', numpy.nan, inplace=True)

In [22]:
# clean up column 'Route'
pipes_orig['Route'] = pipes_orig['Route'].str.strip()

# get rid of "N/A" and any empty routes (which would be empty rows)
pipes_orig = pipes_orig[pipes_orig['Route']!='N/A']
pipes_orig = pipes_orig[pipes_orig['Route']!='']
pipes_orig = pipes_orig[pipes_orig['RouteAccuracy']!='no route']

# possible missing route options
missing_route_options = ['Unavailable',
                         'Capacity expansion only',
                         'Bidirectionality upgrade only',
                         'Short route (< 100 km)']

pipes_noroute_df = pipes_orig.copy()[pipes_orig['Route'].isin(missing_route_options)]
pipes_withroute_df = pipes_orig.copy()[~pipes_orig['Route'].isin(missing_route_options)]

In [23]:
def convert_gfit_to_linestring(coord_str, pipeline_name, segment_name):
    '''
    Takes string from GFIT column of coordinates for a single pipeline,
    converts that string into Shapely LineString or MultiLinestring.
    '''
    #print(pipeline_name, segment_name)
    
    if ':' in coord_str and ';' not in coord_str:
        # simple geometry; no branching
        # create nested list of lists, separating on colons        
        coord_list = coord_str.split(':')
        coord_list_tuples = []
        # non-branched pipeline (nested list with one level)
        # convert nested list of lists to list of tuples
        try:
            for element in coord_list:
                element_tuple = (float(element.split(',')[1]), 
                                 float(element.split(',')[0]))
                coord_list_tuples.append(element_tuple)
        except:
            print(f"Exception for {pipeline_name}; {segment_name}; element: {element}") # for db
        route_conv = shapely.geometry.LineString(coord_list_tuples)

    elif ':' in coord_str and ';' in coord_str:
        # create a nested list of lists, separating on semicolons
        coord_list = coord_str.split(';')   
        # create a second level of nesting, separating on colons
        coord_list = [x.split(':') for x in coord_list]
        # branched pipeline (nested list with two levels)
        route_conv_list_all = []
        
        for nested_list in coord_list:
            coord_list_tuples = []
            # process element
            try:
                for element in nested_list:
                    element_tuple = (float(element.split(',')[1]), 
                                     float(element.split(',')[0]))
                    coord_list_tuples.append(element_tuple)
            except:
                print(f"Exception for {pipeline_name}; segment {segment_name}; element: {element}") # for db
            # process coord_list_tuples
            try:
                route_conv_list = shapely.geometry.LineString(coord_list_tuples)
                route_conv_list_all.append(route_conv_list)
            except:
                print(f"Exception for {pipeline_name}; coord_list_tuples: {coord_list_tuples}") # for db
                pass
        
        route_conv = shapely.geometry.MultiLineString(route_conv_list_all)
        
    return route_conv

In [24]:
def convert_all_pipelines(df):
    """
    Apply the conversion function to all pipelines in the dataframe.
    """
    # create geometry column with empty strings
    df['geometry'] = ''
    
    # filter to keep only pipelines with routes
    mask_route = df['Route'].str.contains(',' or ':')
    pipes_with_route = df.loc[mask_route]
    
    for row in pipes_with_route.index:
        route_str = df.at[row, 'Route']
        pipeline_name = df.at[row, 'PipelineName']
        segment_name = df.at[row, 'SegmentName']
        route_str_converted = convert_gfit_to_linestring(route_str, pipeline_name, segment_name)
        df.at[row, 'geometry'] = route_str_converted
    
    return df

In [25]:
# code to create a dataframe with WKT formatted geometry

# (1) copy gas_pipelines, clean up
to_convert = pipes_withroute_df.copy()#gas_pipes.copy()s)]

# (2) convert all pipelines
pipes_wkt_df = convert_all_pipelines(to_convert)
pipes_wkt_df = pipes_wkt_df.reset_index(drop=True)

# (3) store in a GeoDataFrame, attach a projection, transform to a different one
#pipes_wkt_df['geometry'] = pipes_wkt_df['geometry'].apply(shapely.wkt.loads)
pipes_wkt_gdf = geopandas.GeoDataFrame(pipes_wkt_df, geometry=pipes_wkt_df['geometry'])
pipes_wkt_gdf = pipes_wkt_gdf.set_crs('epsg:4326')
pipes_wkt_gdf_4087 = pipes_wkt_gdf.to_crs('epsg:4087')

# save geodataframe as Excel file with just ProjectID, geometry

In [26]:
pipes_wkt_gdf[['ProjectID','geometry']]

Unnamed: 0,ProjectID,geometry
0,P3210,"LINESTRING (-102.12172 31.95284, -101.59546 31..."
1,P1495,"MULTILINESTRING ((85.06487 45.62948, 84.82499 ..."
2,P2672,"LINESTRING (-102.15200 31.97061, -101.13689 31..."
3,P3165,"LINESTRING (-96.64601 28.97849, -96.54925 28.6..."
4,P3166,"LINESTRING (-95.43374 28.94926, -95.03306 28.5..."
...,...,...
3699,P5314,"LINESTRING (112.18784 35.68862, 111.72024 35.7..."
3700,P5315,"LINESTRING (111.71690 35.73930, 111.37151 35.6..."
3701,P5316,"LINESTRING (116.86021 39.11826, 116.20927 39.0..."
3702,p5317,"LINESTRING (115.65608 39.02033, 115.47343 38.8..."


In [27]:
now_string = datetime.datetime.now().strftime('%Y-%m-%d')
filename = fuel_type+'-routes-wkt-format-'+now_string+'.xlsx'
pipes_wkt_gdf[['ProjectID','geometry']].to_excel(filename, index=False)
print('saved as', filename)

saved as oil-and-gas-routes-wkt-format-2023-04-20.xlsx


In [28]:
now_string = datetime.datetime.now().strftime('%Y-%m-%d')
filename = fuel_type+'-routes-wkt-format-'+now_string+'.csv'
pipes_wkt_gdf[['ProjectID','geometry']].to_csv(filename, index=False)
print('saved as', filename)

saved as oil-and-gas-routes-wkt-format-2023-04-20.csv
