In [2]:
import pygsheets # use 'pip install pygsheets'
import numpy
import datetime

import pandas
import geopandas
from geopandas.tools import overlay

pandas.set_option('display.max_rows', 100)

import shapely

# terminal data

## import

In [1]:
credentials_directory = '/Users/baird/Dropbox/_google-api/'
gc = pygsheets.authorize(client_secret=credentials_directory+'client_secret.json')
spreadsheet = gc.open_by_key('1tcS6Wd-Wp-LTDpLzFgJY_RSNDnbyubW3J_9HKIAys4A')

#spreadsheet[1] "Gas Pipelines" tab is the second index
terms_df_orig = spreadsheet.worksheet('title', 'Terminals').get_as_df()
terms_dict_df = spreadsheet.worksheet('title', 'Data dictionary').get_as_df()
terms_acronyms_df = spreadsheet.worksheet('title', 'Acronyms').get_as_df()
terms_copyright_df = spreadsheet.worksheet('title', 'Copyright').get_as_df()

NameError: name 'pygsheets' is not defined

## clean up

In [14]:
# remove oil export terminals
terms_df_orig = terms_df_orig[terms_df_orig['Type1']!='Oil']
# remove anything without a wiki page
terms_df_orig = terms_df_orig[terms_df_orig['Wiki']!='']

In [15]:
terms_dict_df_include = terms_dict_df.copy()[terms_dict_df.copy()['IncludeWithDataRelease']=='Yes']
terms_dict_df_include = terms_dict_df_include.sort_values('DataReleaseColumnOrder', ascending=True)
terms_dict_df_include = terms_dict_df_include[['VariableName','Definition']]

In [16]:
no_lonlat_options = [
    'Unknown',
    'TBD'
]

## clean up more, create shapely points

In [17]:
# code to create a dataframe with WKT formatted geometry

# (1) copy, clean up
to_convert_df = terms_df_orig.copy()
to_convert_df = to_convert_df[~(to_convert_df['Latitude'].isin(no_lonlat_options)) |
                             ~(to_convert_df['Longitude'].isin(no_lonlat_options))]

# also keep the non-converted ones separate
not_converted_df = terms_df_orig.copy()
not_converted_df = not_converted_df[(not_converted_df['Longitude'].isin(no_lonlat_options)) | 
                                    (not_converted_df['Latitude'].isin(no_lonlat_options))]
# add a dummy column so that the dimensions match with converted wkt pipelines
not_converted_df.assign(ColName='geometry')
not_converted_df['geometry'] = [shapely.geometry.Point()]*not_converted_df.shape[0]
not_converted_df.reset_index(drop=True)
not_converted_gdf = geopandas.GeoDataFrame(not_converted_df, geometry=not_converted_df['geometry'])

# (2) convert all terminals
terms_df_converted = to_convert_df.copy()
terms_df_converted.assign(ColName='geometry')
terms_df_converted['geometry'] = to_convert_df[['Longitude','Latitude']].apply(shapely.geometry.Point, axis=1)
terms_df_converted = terms_df_converted.reset_index(drop=True)

# # (3) store in a GeoDataFrame, attach a projection, transform to a different one
terms_df_gdf = geopandas.GeoDataFrame(terms_df_converted, geometry=terms_df_converted['geometry'])
terms_df_gdf = terms_df_gdf.set_crs('epsg:4326')
terms_df_gdf_4087 = terms_df_gdf.to_crs('epsg:4087')

  arr = construct_1d_object_array_from_listlike(values)
  arr = construct_1d_object_array_from_listlike(values)


In [18]:
all_terms_df = pandas.concat([terms_df_gdf, not_converted_gdf])
all_terms_df = all_terms_df.reset_index(drop=True)
all_terms_df.sort_values('ComboID', inplace=True)

In [19]:
terms_dict_df_sorted = terms_dict_df[terms_dict_df['IncludeWithDataRelease']=='Yes'].sort_values('DataReleaseColumnOrder')
output_columns = terms_dict_df_sorted['VariableName'].tolist()

In [20]:
all_terms_df_to_save = all_terms_df[output_columns]
all_terms_df_to_save_gdf = geopandas.GeoDataFrame(all_terms_df_to_save, geometry=all_terms_df['geometry'])

## save as GeoJSON file

In [21]:
now_string = datetime.datetime.now().strftime('%Y-%m-%d')
filename = 'GEM-LNG-Terminals-'+now_string+'.geojson'
all_terms_df_to_save_gdf.to_file(filename, driver='GeoJSON')
print('saved as', filename)

saved as GEM-LNG-Terminals-2022-05-11.geojson


## save as shapefile

In [22]:
now_string = datetime.datetime.now().strftime('%Y-%m-%d')
filename = 'GEM-LNG-Terminals-dataset-'+now_string+'.shp'
all_terms_df_to_save_gdf.to_file(filename, driver='GeoJSON')
print('saved as', filename)

saved as GEM-LNG-Terminals-dataset-2022-05-11.shp


In [23]:
# remove oil export terminals
terms_df_orig = terms_df_orig[terms_df_orig['Type1']!='Oil']
# remove anything without a wiki page
terms_df_orig = terms_df_orig[terms_df_orig['Wiki']!='']

In [24]:
terms_dict_df_include = terms_dict_df.copy()[terms_dict_df.copy()['IncludeWithDataRelease']=='Yes']
terms_dict_df_include = terms_dict_df_include.sort_values('DataReleaseColumnOrder', ascending=True)
terms_dict_df_include = terms_dict_df_include[['VariableName','Definition']]

In [25]:
terms_df_subset = terms_df_orig.copy()[terms_dict_df_include['VariableName'].tolist()]

# pipeline data

## import

In [37]:
#fuel_type = 'Gas'
#fuel_type = 'Oil'
fuel_type = 'Oil-and-Gas'

credentials_directory = '/Users/baird/Dropbox/_google-api/'
gc = pygsheets.authorize(client_secret=credentials_directory+'client_secret.json')

spreadsheet = gc.open_by_key('1foPLE6K-uqFlaYgLPAUxzeXfDO5wOOqE7tibNHeqTek')
gas_pipes = spreadsheet.worksheet('title', 'Gas pipelines').get_as_df()
oil_pipes = spreadsheet.worksheet('title', 'Oil/NGL pipelines').get_as_df()
#owners = spreadsheet[3].get_as_df()

pipes_dict_df = spreadsheet.worksheet('title', 'Data dictionary').get_as_df()

#gas_pipes = gas_pipes.drop('WKTFormat', axis=1) # delete WKTFormat column
#oil_pipes = oil_pipes.drop('WKTFormat', axis=1)
#pipes_df_orig = pandas.concat([oil_pipes, gas_pipes], ignore_index=True)

if fuel_type == 'Gas':
    pipes_df_orig = gas_pipes.copy() #pandas.concat([oil_pipes, gas_pipes], ignore_index=True)
if fuel_type == 'Oil':
    pipes_df_orig = oil_pipes.copy()
if fuel_type == 'Oil-and-Gas':
    pipes_df_orig = pandas.concat([oil_pipes, gas_pipes], ignore_index=True)


## clean up

In [38]:
# clean up rows that should not be distributed
pipes_df_orig = pipes_df_orig[pipes_df_orig['Status']!='N/A']
pipes_df_orig = pipes_df_orig[pipes_df_orig['PipelineName']!='']

In [39]:
status_in_dev = ['Proposed', 
                 'Construction', 
                 'Shelved', 'Operating', 
                 'Mothballed', 
                 'Cancelled', 
                 'Retired', 
                 'Idle']
no_route_options = [
    'Unavailable', 
    'Capacity expansion only', 
    'Bidirectionality upgrade only',
    'Short route (< 100 km)', 
    'N/A',
    ''
]

# filter for the statuses above in the status_in_dev list (modify as desired)
#gas_pipes = gas_pipes[gas_pipes['Status'].str.lower().isin(status_in_dev)]

## convert pipeline data to geodata info

In [40]:
def convert_gfit_to_linestring(coord_str, pipeline_name):
    '''
    Takes string from GFIT column of coordinates for a single pipeline,
    converts that string into Shapely LineString or MultiLinestring.
    '''
    #print(coord_str, pipeline_name)
    if ':' in coord_str and ';' not in coord_str:
        # simple geometry; no branching
        # create nested list of lists, separating on colons        
        coord_list = coord_str.split(':')
        coord_list_tuples = []
        # non-branched pipeline (nested list with one level)
        # convert nested list of lists to list of tuples
        try:
            for element in coord_list:
                element_tuple = (float(element.split(',')[1]), 
                                 float(element.split(',')[0]))
                coord_list_tuples.append(element_tuple)
        except:
            print(f"Exception for {pipeline_name}; element: {element}") # for db
        route_conv = shapely.geometry.LineString(coord_list_tuples)

    elif ':' in coord_str and ';' in coord_str:
        # create a nested list of lists, separating on semicolons
        coord_list = coord_str.split(';')   
        # create a second level of nesting, separating on colons
        coord_list = [x.split(':') for x in coord_list]
        # branched pipeline (nested list with two levels)
        route_conv_list_all = []
        
        for nested_list in coord_list:
            coord_list_tuples = []
            # process element
            try:
                for element in nested_list:
                    element_tuple = (float(element.split(',')[1]), 
                                     float(element.split(',')[0]))
                    coord_list_tuples.append(element_tuple)
            except:
                print(f"Exception for {pipeline_name}; element: {element}") # for db
            # process coord_list_tuples
            try:
                route_conv_list = shapely.geometry.LineString(coord_list_tuples)
                route_conv_list_all.append(route_conv_list)
            except:
                print(f"Exception for {pipeline_name}; coord_list_tuples: {coord_list_tuples}") # for db
                pass
            
        route_conv = shapely.geometry.MultiLineString(route_conv_list_all)
        
    return route_conv

In [41]:
def convert_all_pipelines(df):
    """
    Apply the conversion function to all pipelines in the dataframe.
    """
    # create geometry column with empty strings
    #df.assign(ColName='geometry', dtype='str')
    df['geometry'] = ''
    #print(df['geometry'])
    
    # filter to keep only pipelines with routes
    mask_route = df['Route'].str.contains(',' or ':')
    pipes_with_route = df.loc[mask_route]
    
    for row in pipes_with_route.index:
        route_str = df.at[row, 'Route']
        pipeline_name = df.at[row, 'PipelineName']
        
        route_str_converted = convert_gfit_to_linestring(route_str, pipeline_name)
    
        #print(df.at[row,'ProjectID'])
        #print(pipeline_name)
        #print(route_str_converted)
        
        df.at[row, 'geometry'] = route_str_converted   
        
    return df

## clean up more

In [42]:
# code to create a dataframe with WKT formatted geometry

# (1) copy, clean up
to_convert_df = pipes_df_orig.copy()
to_convert_df = to_convert_df[~to_convert_df['Route'].isin(no_route_options)]

# also keep the non-converted ones separate
not_converted_df = pipes_df_orig.copy()
not_converted_df = not_converted_df[not_converted_df['Route'].isin(no_route_options)]
# add a dummy column so that the dimensions match with converted wkt pipelines
not_converted_df.assign(ColName='geometry')
not_converted_df['geometry'] = [shapely.geometry.MultiLineString()]*not_converted_df.shape[0]
not_converted_df.reset_index(drop=True)
not_converted_gdf = geopandas.GeoDataFrame(not_converted_df, geometry=not_converted_df['geometry'])

# (2) convert all pipelines
pipes_df_wkt = convert_all_pipelines(to_convert_df)
pipes_df_wkt = pipes_df_wkt.reset_index(drop=True)

# (3) store in a GeoDataFrame, attach a projection, transform to a different one
pipes_df_wkt_gdf = geopandas.GeoDataFrame(pipes_df_wkt, geometry=pipes_df_wkt['geometry'])
pipes_df_wkt_gdf = pipes_df_wkt_gdf.set_crs('epsg:4326')
pipes_df_wkt_gdf_4087 = pipes_df_wkt_gdf.to_crs('epsg:4087')

  result[:] = values


Exception for Eight-Three Oil Pipeline Network; coord_list_tuples: [(119.608599, 39.935148)]


## concatenate "no route options" back to have full dataset for export

In [43]:
all_pipes_df = pandas.concat([pipes_df_wkt_gdf, not_converted_gdf])
all_pipes_df = all_pipes_df.reset_index(drop=True)
all_pipes_df.sort_values('ProjectID', inplace=True)

In [44]:
pipes_dict_df_sorted = pipes_dict_df[(pipes_dict_df['IncludeWithDataRelease']=='Yes') &
                                     (pipes_dict_df['GasVariable']=='Yes')].sort_values('DataReleaseColumnOrder')
output_columns = pipes_dict_df_sorted['VariableName'].tolist()

In [45]:
all_pipes_df_to_save = all_pipes_df[output_columns]
all_pipes_df_to_save_gdf = geopandas.GeoDataFrame(all_pipes_df_to_save, geometry=all_pipes_df['geometry'])

## save as GeoJSON file

In [46]:
now_string = datetime.datetime.now().strftime('%Y-%m-%d')
filename = 'GEM-'+fuel_type+'-Pipelines-'+now_string+'.geojson'
all_pipes_df_to_save_gdf.to_file(filename, driver='GeoJSON')
print('saved as', filename)

saved as GEM-Oil-and-Gas-Pipelines-2022-05-11.geojson


## save as shapefile

In [47]:
now_string = datetime.datetime.now().strftime('%Y-%m-%d')
filename = 'GEM-'+fuel_type+'-Pipelines-'+now_string+'.shp'
all_pipes_df_to_save_gdf.to_file(filename, driver='GeoJSON')
print('saved as', filename)

saved as GEM-Oil-and-Gas-Pipelines-2022-05-11.shp


## save as csv

In [65]:
now_string = datetime.datetime.now().strftime('%Y-%m-%d')
filename = 'GEM-'+fuel_type+'-Pipelines-'+now_string+'.csv'
all_pipes_df_to_save_gdf.to_csv(filename, index=False)
print('saved as', filename)

saved as GEM-Gas-Pipelines-2022-03-10.csv
