In [1]:
"""
GENERAL STEPS
- run sql query to get all non-spatial TMC data and load into dataframe
- load shp of NHS segments into geodataframe
- left join non-spatial TMC table to NHS geotable
- where spatial data missing:
    - add "true_shp" flag; set to 0 (1 = has true shape, 0 = needs true shape built)
    - compute "stick" geometry using start/end lat/long vals.
    - CHECK TO DO: can you programmatically check if a TMC's start/end points haven't changed, and if not just plug in the geom from the 2021 INRIX file?
 - export to feature class; DAMS will need to manually correct missing true shapes (how to prioritize?)
    
"""
import datetime as dt
from pathlib import Path

import numpy as np
import geopandas as gpd
import pandas as pd
import arcpy
from arcgis.features import GeoAccessor, GeoSeriesAccessor
from shapely import LineString

from esri_file_to_dataframe import esri_to_df
from sqlqry2pandas import sqlqry_to_df

# true-shape TMCs, but only for NHS
shp_tmc_nhs = r"I:\Projects\Darren\PPA3_GIS\PPA3_GIS.gdb\NPMRDS_2023_NHS_SACOG" # on WIN10-MODEL-2



# load shp to gdf
shp_fields = ['Tmc']
crs_sacog_ft = 2226 # SACOG CRS with feet as units
shp_native_crs = arcpy.Describe(shp_tmc_nhs).spatialReference.factoryCode
gdf_nhs = esri_to_df(esri_obj_path=shp_tmc_nhs, include_geom=True, field_list=shp_fields, index_field=None, 
               crs_val=crs_sacog_ft, dissolve=False)

if gdf_nhs.crs.to_epsg() != crs_sacog_ft:
    gdf_nhs = gdf_nhs.to_crs(f"EPSG:{crs_sacog_ft}")

print('gdf loaded')

gdf loaded


In [2]:
# non-spatial data stored in SQL Server
npmrds_db = 'NPMRDS'
tt_tbl = 'npmrds_2023_alltmc_paxtruck_comb'
tmc_txt_tbl = 'npmrds_2023_alltmc_txt' # full TMC network, but lacking spatial true-shape data
data_year = 2023
crs_npmrds_raw = 4326 # WGS 84, the default system for the lat-lon points

csv_speed_data = r"I:\Projects\Darren\PPA3_GIS\CSV\NPMRDS\npmrds_metrics_2023.csv"

#-------------------
with open('PPA_NPMRDS_metrics_latest.sql', 'r') as f:
    sql_template = f.read()
    
    params = dict(tt_tbl=tt_tbl, tmc_tbl=tmc_txt_tbl)
    formatted_sql = sql_template.format(**params)

# run query and load to spatial df
# 10/17/2024 - for now are running as separate SQL query, since running this cell creates issue and query takes a *long* time (20+mins)
# print("running query for speed data...")
# df_npmrds = sqlqry_to_df(query_str=formatted_sql, dbname=npmrds_db)

# alternative just loading results from CSV instead of running raw query
df_npmrds = pd.read_csv(csv_speed_data)

# temp func to get tmc spec table for testing--ideally this will seamlessly be part of SQL in later versions
test_str = f'SELECT tmc, start_latitude, start_longitude, end_latitude, end_longitude FROM npmrds_2023_alltmc_txt'
dftest = sqlqry_to_df(query_str=test_str, dbname=npmrds_db)
dftest.head(3)

df_npmrds = df_npmrds.merge(dftest, on='tmc')

# left join non-spatial TMC table to NHS geotable
f_trushp = 'tru_shp_yr'
dfjn = df_npmrds.merge(gdf_nhs, how='left', left_on='tmc', right_on='Tmc')
dfjn = gpd.GeoDataFrame(dfjn, geometry='geometry')
dfjn[f_trushp] = 0 # by default, assume not true shape
dfjn.loc[~dfjn['geometry'].isnull(), f_trushp] = data_year # set to 1 if TMC exists in NHS shapefile

Executing query. Results loading into dataframe...
Successfully executed query in 0.0 minutes. 7677 rows loaded into dataframe.


In [4]:
# where you can, repurpose geographies from the 2021 SHP from Inrix to reduce amount of manual coding needed

# load in old Inrix true-shapes file
shp_fulltmc_2021 = r'I:\Projects\Darren\PPA3_GIS\PPA3.0_archive.gdb\INRIX_SHP_2020_2021_SACOG'

shp_fields = ['Tmc', 'StartLat', 'StartLong', 'EndLat', 'EndLong']
shp_crs_full = arcpy.Describe(shp_fulltmc_2021).spatialReference.factoryCode # 2226 = EPSG code for SACOG region
gdf_fulltmc_2021 = esri_to_df(esri_obj_path=shp_fulltmc_2021, include_geom=True, field_list=shp_fields, index_field=None, 
               crs_val=f"EPSG:{shp_crs_full}", dissolve=False)

# merge to master df with speed data
gdf_fulltmc_2021_prj = gdf_fulltmc_2021.to_crs(f"EPSG:{crs_sacog_ft}")

# 10/17/1014 intentional duplicate of above to_crs() command due to know recent bug with geopandas.
# more details - https://github.com/geopandas/geopandas/issues/3433
gdf_fulltmc_2021_prj = gdf_fulltmc_2021.to_crs(f"EPSG:{crs_sacog_ft}") 

dfjn = dfjn.merge(gdf_fulltmc_2021_prj, how='left', left_on='tmc', right_on='Tmc', suffixes=('','_2021'))


# compute difference between 2021 vs. latest TMCs' "stick" distance 
update_dict = {'start_pt_new': ['start_longitude', 'start_latitude'],
               'end_pt_new': ['end_longitude', 'end_latitude'],
               'start_pt_old': ['StartLong', 'StartLat'],
               'end_pt_old': ['EndLong', 'EndLat']}

for cname, fields in update_dict.items():
    dfjn[cname] = gpd.points_from_xy(dfjn[fields[0]], dfjn[fields[1]], crs="EPSG:4326").to_crs(f"EPSG:{crs_sacog_ft}")
    
dfjn['abs_dist_dif'] = abs(np.sqrt((dfjn['end_pt_new'].x - dfjn['start_pt_new'].x)**2 + (dfjn['end_pt_new'].y - dfjn['start_pt_new'].y)**2)
                    - np.sqrt((dfjn['end_pt_old'].x - dfjn['start_pt_old'].x)**2 + (dfjn['end_pt_old'].y - dfjn['start_pt_old'].y)**2))

dist_units = dfjn.crs.to_dict()['units']
if dfjn.crs.to_dict()['units'] != 'us-ft':
    print(f'WARNING: distance units are in {dist_units}')
    
# if not already a tru shp from NHS *and* distance difference between old and new lat/longs is < 1 foot, then sub in the 2021 geometry
dfjn.loc[(dfjn[f_trushp] == 0) & (dfjn['abs_dist_dif'] < 1), 'geometry'] = dfjn['geometry_2021']
dfjn.loc[(dfjn[f_trushp] == 0) & (dfjn['abs_dist_dif'] < 1), f_trushp] = 2021 # set to 2 to reflect TMCs that came from 2021


# for remaining TMCs that are still without a geometry, just get the "stick" geometry. These will need to be manually edited to be true shapes in GIS.
def pts_to_linestring(row):
    startx = row['start_pt_new'].x
    starty = row['start_pt_new'].y
    endx = row['end_pt_new'].x
    endy = row['end_pt_new'].y
    
    return LineString([[startx, starty], [endx, endy]])

dfjn.loc[(dfjn[f_trushp] == 0), 'geometry'] = dfjn.apply(lambda x: pts_to_linestring(x), axis=1)

# *but* if the "stick" length is sufficiently close to the spec file miles, then mark as not needing manual edit
# specifically, if the real length is less than 0.15% different from stick, then consider as not needing manual edit
dfjn['pctdiff_v_trushp'] = 0
dfjn.loc[(dfjn[f_trushp] == 0), 'pctdiff_v_trushp'] = abs(dfjn['miles'] - (dfjn['geometry'].length/5280)) / dfjn['miles']
dfjn.loc[(dfjn[f_trushp] == 0) & (dfjn['pctdiff_v_trushp'] <= 0.0015), f_trushp] = data_year
dfjn[f_trushp].value_counts()

  proj = self._crs.to_proj4(version=version)


tru_shp_yr
2021    3673
2023    3054
0        950
Name: count, dtype: int64

In [5]:
# export to ESRI feature class
sufx = str(dt.datetime.now().strftime('%Y%m%d_%H%M'))
outname = f"NPMRDS_{data_year}data_{sufx}"
out_gdb = r'I:\Projects\Darren\PPA3_GIS\PPA3_GIS.gdb'
out_path = str(Path(out_gdb).joinpath(outname))


fields_to_delete = ['tmc_appearance_n',
       'start_latitude', 'start_longitude', 'end_latitude', 'end_longitude',
       'Tmc', 'Tmc_2021', 'StartLat', 'StartLong',
       'EndLat', 'EndLong', 'geometry_2021', 'start_pt_new', 'end_pt_new',
       'start_pt_old', 'end_pt_old']


sedf = pd.DataFrame.spatial.from_geodataframe(dfjn)
output_fields = [f for f in sedf.columns if f not in fields_to_delete]
sedf = sedf[output_fields]
sedf.spatial.to_featureclass(out_path, sanitize_columns=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._data[col] = array


'I:\\Projects\\Darren\\PPA3_GIS\\PPA3_GIS.gdb\\NPMRDS_2023data_20241017_1129'

In [7]:
sedft = sedf.loc[sedf[f_trushp] == 2021].head(5)
sedft
# sedft.spatial.to_featureclass(out_path, sanitize_columns=False)

Unnamed: 0,tmc,road,route_numb,f_system,nhs,miles,tt_p80_ampk,tt_p50_ampk,tt_p80_midday,tt_p50_midday,...,epochs_ampk,epochs_midday,epochs_pmpk,epochs_weekend,epochs_worst4hrs,epochs_slowest_hr,epochs_night,tru_shp_yr,abs_dist_dif,SHAPE
193,105+06689,CA-16,0,0,0,19.12955,1501.97002,1315.094971,1513.065967,1350.319946,...,2228,4164,2358,3071,2470,502,4224,2021,0.345912,"{""paths"": [[[6491576.142538544, 2087388.286528..."
194,105+06690,CA-16,0,0,0,0.701589,58.740002,53.279999,60.139999,54.91,...,1151,2706,1458,1832,1364,237,2640,2021,0.08566,"{""paths"": [[[6547829.771535119, 2019423.971815..."
196,105+06692,CA-16,0,0,0,1.574321,104.949997,97.720001,106.690002,98.75,...,2218,3397,1753,2348,1939,287,3717,2021,0.222579,"{""paths"": [[[6557813.3447116455, 2012548.46001..."
197,105+06693,CA-16,0,0,0,0.949343,65.720001,61.029999,67.010002,61.580002,...,2080,3224,1701,2273,1937,527,3497,2021,0.267372,"{""paths"": [[[6566008.3291473165, 2011040.82623..."
198,105+06694,CA-16,0,0,0,0.690459,52.889999,48.740002,53.959999,48.91,...,2085,3263,1707,2146,2228,523,3419,2021,0.290613,"{""paths"": [[[6571075.366860051, 2010109.865236..."
