In [None]:
#install libraries
!pip install pygsheets
import pygsheets # use 'pip install pygsheets'
import numpy
import datetime

import pandas as pd
!pip install geopandas
import geopandas as gpd
from geopandas.tools import overlay

import shapely

Collecting pygsheets
  Downloading pygsheets-2.0.5-py2.py3-none-any.whl (147 kB)
[K     |████████████████████████████████| 147 kB 8.3 MB/s eta 0:00:01
Installing collected packages: pygsheets
Successfully installed pygsheets-2.0.5
Collecting geopandas
  Downloading geopandas-0.10.2-py2.py3-none-any.whl (1.0 MB)
[K     |████████████████████████████████| 1.0 MB 8.5 MB/s 
Collecting pyproj>=2.2.0
  Downloading pyproj-3.2.1-cp37-cp37m-manylinux2010_x86_64.whl (6.3 MB)
[K     |████████████████████████████████| 6.3 MB 45.5 MB/s 
[?25hCollecting fiona>=1.8
  Downloading Fiona-1.8.20-cp37-cp37m-manylinux1_x86_64.whl (15.4 MB)
[K     |████████████████████████████████| 15.4 MB 38.3 MB/s 
Collecting click-plugins>=1.0
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Collecting munch
  Downloading munch-2.5.0-py2.py3-none-any.whl (10 kB)
Collecting cligj>=0.5
  Downloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Installing collected packages: munch, cligj, click-plugins, pyproj, 

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#import pipelines current (need to write)
#file = open('/content/Pipelines_Current.xlsx', 'r')
spreadsheet = pd.ExcelFile('/content/Pipelines_Current (14).xlsx')
#spreadsheet[1] "Gas Pipelines" tab is the second index
gas_pipes = pd.read_excel(spreadsheet, 'Gas Pipelines')
#oil_pipes = pd.read_excel(spreadsheet, 'Oil/NGL Pipelines')
owners = pd.read_excel(spreadsheet, 'Owners')


gas_pipes = gas_pipes.drop('WKTFormat', axis=1) # delete WKTFormat column
#oil_pipes = oil_pipes.drop('WKTFormat', axis=1)

In [None]:
status_in_dev = ['proposed', 'construction', 'shelved', 'operating', 'mothballed', 'cancelled', 'retired', 'idle']
no_route_options = [
    'Unavailable', 
    'Capacity expansion only', 
    'Bidirectionality upgrade only',
    ''
]

# filter for the statuses above in the status_in_dev list (modify as desired)
gas_pipes = gas_pipes[gas_pipes['Status'].str.lower().isin(status_in_dev)]

In [None]:
#oil_pipes = oil_pipes.dropna(subset=['ProjectID'])

# exclude oil rows with "don't add" for ProjectID
#oil_pipes = oil_pipes[oil_pipes['ProjectID']!="don't add"]

In [None]:
def convert_gfit_to_linestring(coord_str, pipeline_name):
    '''
    Takes string from GFIT column of coordinates for a single pipeline,
    converts that string into Shapely LineString or MultiLinestring.
    '''

    if ':' in coord_str and ';' not in coord_str:
        # simple geometry; no branching
        # create nested list of lists, separating on colons        
        coord_list = coord_str.split(':')
        coord_list_tuples = []
        # non-branched pipeline (nested list with one level)
        # convert nested list of lists to list of tuples
        try:
            for element in coord_list:
                element_tuple = (float(element.split(',')[1]), 
                                 float(element.split(',')[0]))
                coord_list_tuples.append(element_tuple)
        except:
            print(f"Exception for {pipeline_name}; element: {element}") # for db
        route_conv = shapely.geometry.LineString(coord_list_tuples)

    elif ':' in coord_str and ';' in coord_str:
        # create a nested list of lists, separating on semicolons
        coord_list = coord_str.split(';')   
        # create a second level of nesting, separating on colons
        coord_list = [x.split(':') for x in coord_list]
        # branched pipeline (nested list with two levels)
        route_conv_list_all = []
        
        for nested_list in coord_list:
            coord_list_tuples = []
            # process element
            try:
                for element in nested_list:
                    element_tuple = (float(element.split(',')[1]), 
                                     float(element.split(',')[0]))
                    coord_list_tuples.append(element_tuple)
            except:
                print(f"Exception for {pipeline_name}; element: {element}") # for db
            # process coord_list_tuples
            try:
                route_conv_list = shapely.geometry.LineString(coord_list_tuples)
                route_conv_list_all.append(route_conv_list)
            except:
                print(f"Exception for {pipeline_name}; coord_list_tuples: {coord_list_tuples}") # for db
                pass
            
        route_conv = shapely.geometry.MultiLineString(route_conv_list_all)
        
    return route_conv

In [None]:
def convert_all_pipelines(df):
    """
    Apply the conversion function to all pipelines in the dataframe.
    """
    # create geometry column with empty strings
    df['geometry'] = ''
    
    # filter to keep only pipelines with routes
    mask_route = df['Route'].str.contains(',' or ':')
    pipes_with_route = df.loc[mask_route]
    
    for row in pipes_with_route.index:
        route_str = df.at[row, 'Route']
        pipeline_name = df.at[row, 'PipelineName']
        route_str_converted = convert_gfit_to_linestring(route_str, pipeline_name)
        df.at[row, 'geometry'] = route_str_converted
        
    return df



In [None]:
from shapely import geometry
# code to create a dataframe with WKT formatted geometry

# (1) copy gas_pipelines, clean up
to_convert = gas_pipes.copy()
to_convert = to_convert.dropna(subset=['Route'])
to_convert = to_convert[~to_convert['Route'].isin(no_route_options)]

# (2) convert all pipelines
gas_pipes_wkt = convert_all_pipelines(to_convert)
gas_pipes_wkt = gas_pipes_wkt.reset_index(drop=True)

# (3) store in a GeoDataFrame, attach a projection, transform to a different one
gas_pipes_wkt_gdf = gpd.GeoDataFrame(gas_pipes_wkt, geometry=gas_pipes_wkt['geometry'])
gas_pipes_wkt_gdf = gas_pipes_wkt_gdf.set_crs('epsg:4326')
gas_pipes_wkt_gdf_4087 = gas_pipes_wkt_gdf.to_crs('epsg:4087')

# (4) drop LastUpdate column
gas_pipes_wkt_gdf.drop(columns=["LastUpdated"])

TypeError: ignored

In [None]:
#write to CSV (change date manually)
gas_pipes_wkt_gdf.to_csv('/content/oct15_coords.csv')