In [8]:
"""
GENERAL STEPS
- run sql query to get all non-spatial TMC data and load into dataframe
- load shp of NHS segments into geodataframe
- left join non-spatial TMC table to NHS geotable
- where spatial data missing:
    - add "true_shp" flag; set to 0 (1 = has true shape, 0 = needs true shape built)
    - compute "stick" geometry using start/end lat/long vals.
    - CHECK TO DO: can you programmatically check if a TMC's start/end points haven't changed, and if not just plug in the geom from the 2021 INRIX file?
 - export to feature class; DAMS will need to manually correct missing true shapes (how to prioritize?)
    
"""

import geopandas as gpd
import pandas as pd

from esri_file_to_dataframe import esri_to_df
from sqlqry2pandas import sqlqry_to_df

# true-shape TMCs, but only for NHS
shp_tmc_nhs = r"I:\Projects\Darren\PPA3_GIS\PPA3_GIS.gdb\NPMRDS_2023_NHS_SACOG" # on WIN10-MODEL-2



# load shp to gdf
shp_fields = ['Tmc']
shp_crs = arcpy.Describe(shp_tmc_nhs).spatialReference.factoryCode # 2226 = EPSG code for SACOG region
gdf_nhs = esri_to_df(esri_obj_path=shp_tmc_nhs, include_geom=True, field_list=shp_fields, index_field=None, 
               crs_val=shp_crs, dissolve=False)

print('gdf loaded')

gdf loaded


In [19]:
# non-spatial data stored in SQL Server
npmrds_db = 'NPMRDS'
tt_tbl = 'npmrds_2023_alltmc_paxtruck_comb'
tmc_txt_tbl = 'npmrds_2023_alltmc_txt' # full TMC network, but lacking spatial true-shape data
crs_npmrds_raw = 4326 # WGS 84, the default system for the lat-lon points

#-------------------
with open('PPA_NPMRDS_metrics_latest.sql', 'r') as f:
    sql_template = f.read()
    
    params = dict(tt_tbl=tt_tbl, tmc_tbl=tmc_txt_tbl)
    formatted_sql = sql_template.format(**params)

# run query and load to spatial df
print("running query for speed data...")
# df_npmrds = sqlqry_to_df(query_str=formatted_sql, dbname=npmrds_db)

running query for speed data...


In [20]:
# testing query func
test_str = f'SELECT TOP 1000 * FROM {tt_tbl}'
dftest = sqlqry_to_df(query_str=test_str, dbname=npmrds_db)

Executing query. Results loading into dataframe...
Successfully executed query in 0.0 minutes. 1000 rows loaded into dataframe.


In [21]:
dftest.head()

Unnamed: 0,tmc_code,measurement_tstamp,speed,historical_average_speed,reference_speed,travel_time_seconds,data_density
0,105P17070,2023-01-01 16:45:00,53.0,62.0,71.0,0.84,A
1,105-16661,2023-01-01 07:15:00,48.25,54.0,65.0,140.369995,A
2,105-16661,2023-01-01 09:30:00,53.5,,65.0,126.599998,A
3,105-16661,2023-01-01 09:45:00,42.0,,65.0,161.25,A
4,105-16661,2023-01-01 11:15:00,45.0,51.0,65.0,150.5,A


In [None]:
# left join non-spatial TMC table to NHS geotable
f_trushp = 'tru_shp'
dfjn = df_npmrds.merge(gdf_nhs, how='left', left_on='tmc', right_on='Tmc')
dfjn[f_trushp] = 0 # by default, assume not true shape
dfjn.loc[~dfjn['geometry'].isnull(), f_trushp] = 1 # set to 1 if TMC exists in NHS shapefile


# - where spatial data missing:
#     - compute "stick" geometry using start/end lat/long vals.
#     - CHECK TO DO: can you programmatically check if a TMC's start/end points haven't changed, and if not just plug in the geom from the 2021 INRIX file?
#  - export to feature class; DAMS will need to manually correct missing true shapes (how to prioritize?)

In [18]:
# TEST CONCEPT: see where you can repurpose geographies from the 2021 SHP from Inrix to reduce amount of manual coding needed
shp_fulltmc_2021 = r'I:\Projects\Darren\PPA3_GIS\PPA3.0_archive.gdb\INRIX_SHP_2020_2021_SACOG'

shp_fields = ['Tmc']
shp_crs = arcpy.Describe(shp_fulltmc_2021).spatialReference.factoryCode # 2226 = EPSG code for SACOG region
gdf_fulltmc_2021 = esri_to_df(esri_obj_path=shp_fulltmc_2021, include_geom=True, field_list=shp_fields, index_field=None, 
               crs_val=shp_crs, dissolve=False)