# Rasterize Natural Gas Pipeline Network
Creates a raster version at the same extent and cell size of the MIT cost surface.

### Workflow
* Convert feature class to geopandas dataframe
* Subset NC records 
* Group values on `STATUS`, `TYPE`, and `DIAMETER`
 * This results in 29 unique combinations. Each combination is given a unique ID (1 thru 29)
* Add a new field to the list of unique combinations: 2 ^^ index

In [1]:
import rasterio
from rasterio import features
import geopandas as gpd
import pandas as pd
import numpy as np

#### Import the REXTAG natural gas feature class
*NOTE: This is proprietary dataset stored on the secure server*

In [2]:
#Read in features
pipelines_gdb = 'C:\\Workspace\\Rextag_2July2020\\Rextag_Zone2_Natural_Gas.gdb'
pipelines_lyr = 'NaturalGasPipelines'

In [3]:
#Read in pipeline dataset as a geodataframe
gdf = gpd.read_file(pipelines_gdb,driver='FileGDB',layer=pipelines_lyr)

In [4]:
#Print number of unique values in each - ENTIRE DATASET
print(F"STATE:\t\t{gdf.STATE_NAME.nunique()}")
print(F"STATUS:\t\t{gdf.STATUS.nunique()}")
print(F"TYPE:\t\t{gdf.TYPE.nunique()}")
print(F"DIAMETER:\t{gdf.DIAMETER.nunique()}")

STATE:		38
STATUS:		7
TYPE:		3
DIAMETER:	37


In [5]:
#Group the data
grp = gdf.groupby(['STATE_NAME','TYPE','STATUS','DIAMETER'])

In [6]:
#Output to CSV file
grp.count().to_csv("ALL_stats.csv")

## Isolate and map NC data

In [7]:
#Isolate NC records
gdf_nc = gdf.loc[gdf['STATE_NAME']=='North Carolina'].reset_index()

In [8]:
#Group on attribs
grp_nc = gdf_nc.groupby(['STATUS','TYPE','DIAMETER'])

#Output grouped values to a dataframe
df_unique = grp_nc.index.count().reset_index().drop('index',axis=1)
#Save the index value to a field
df_unique['UNIQIUE_ID'] = df_unique.index

#Compute 2^^ index value
df_unique['OUT_CODE'] = df_unique['UNIQIUE_ID'].apply(lambda x: 2**x)

#Save to file
df_unique.to_csv('Data_Lookup.csv',index=False)

In [9]:
#Join the OUTCODE to each row in the gdf_dataset
gdf_nc2 = pd.merge(left = gdf_nc, left_on = ['STATUS','TYPE','DIAMETER'],
                   right = df_unique, right_on = ['STATUS','TYPE','DIAMETER'],
                   how = 'left'
                  )

In [10]:
gdf_nc2.columns

Index(['index', 'NAME', 'SUB_OPER', 'OPERATOR', 'OWNER', 'SYS_NAME',
       'SUBSYS_NM', 'TYPE', 'STATUS', 'DIAMETER', 'TRANSCAP', 'COMMODITY',
       'CMDTY_DESC', 'CMDTY_CODE', 'INTERSTATE', 'FLOW_DIR', 'RATE_ZONE',
       'PREV_OWNER', 'TRANS_DATE', 'TRANS_NOTE', 'TRANS_ID', 'LOC_ID',
       'LOC_DOCS', 'SEG_ID', 'SUBSEG_ID', 'SUBOPER_ID', 'OPER_ID', 'OPER_URL',
       'OPER_DOCS', 'OWNER_ID', 'OWNER_URL', 'OWNER_DOCS', 'CONTACTS_URL',
       'NOTES', 'CATCHALL', 'CNTY_NAME', 'STATE_NAME', 'CNTRY_NAME', 'MILES',
       'UPDATE_NO', 'QUALITY', 'Shape_Length', 'geometry', 'UNIQIUE_ID',
       'OUT_CODE'],
      dtype='object')

In [11]:
gdf_nc2.iloc[:5,[7,8,9,-1]]

Unnamed: 0,TYPE,STATUS,DIAMETER,OUT_CODE
0,Transmission,Operational,0,16384
1,Transmission,Operational,12,1048576
2,Transmission,Operational,0,16384
3,Transmission,Operational,12,1048576
4,Transmission,Operational,0,16384


### Now rasterize each unique value into an image layer, then add all layers

#### Import the 500m MIT Cost Surface raster as a template
This is used to extract the extent and cell size of the created pipeline raster

In [12]:
#Load the cost surface raster
template_raster_dataset = rasterio.open('../data/MIT_Surface_Subset/MIT_CostSurface_500m_NConly.img')
#Get the surface 
template_raster_ = template_raster_dataset.read(1)
#Extract the coordinate reference system
raster_crs = template_raster_dataset.crs
#Get the metadata
raster_meta = template_raster_dataset.meta

In [13]:
#Transform to same crs as cost surface
gdf_nc2 = gdf_nc2.to_crs(raster_crs)

#### Rastersize each unique `OUT_CODE` column as a unique layer

In [14]:
#Create a list to store each layer
image_list = []
#Set the name of the field containing the unique values
fldName ='OUT_CODE'
#Iterate through each unique value
for val in df_unique[fldName].unique():
    print(val)
    #Subset the data
    gdf_subset = gdf_nc2.loc[gdf_nc2[fldName] == val]
    #Get the geometries from the subset
    out_shapes = zip(gdf_subset.geometry,gdf_subset[fldName])
    #Rasterize the geometries
    image = features.rasterize(shapes=out_shapes,
                               out_shape=template_raster_dataset.shape,
                               transform=template_raster_dataset.transform,
                               all_touched=False)
    image_list.append(image)

1
2
4
8
16
32
64
128
256
512
1024
2048
4096
8192
16384
32768
65536
131072
262144
524288
1048576
2097152
4194304
8388608
16777216
33554432
67108864
134217728
268435456


In [15]:
#Stack images and add together
img_stack = np.stack(image_list)

In [16]:
#Compute the sum of the stack
img_sum = img_stack.sum(axis=0)

In [17]:
#Get the bit depth
min_dtype = rasterio.dtypes.get_minimum_dtype(gdf_nc2[fldName])
print(min_dtype)

uint32


In [18]:
#Update the metadata with the bit depth
raster_meta.update(
{'driver':'GTiff',
 'nodata':0,
 'dtype':min_dtype
})

In [19]:
#Export as tiff
with rasterio.open('../data/processed/NC_NGPipeline.tif','w',**raster_meta) as dst:
    dst.write(img_sum, indexes=1)