# Build Validation Table for NPMRDS
For each of the 9 SACSIM time periods, provide:
* Average speed
* Number of epochs
* Standard error of the speed (standard deviation / mean)

In [1]:
import os
from time import perf_counter as perf

import pandas as pd
import urllib
import sqlalchemy as sqla # needed to run pandas df.to_sql() function
    
# extract SQL Server query results into a pandas dataframe   
def sqlqry_to_df(query_str, dbname, servername='SQL-SVR', trustedconn='yes'):     

    conn_str = "DRIVER={ODBC Driver 17 for SQL Server};" \
        f"SERVER={servername};" \
        f"DATABASE={dbname};" \
        f"Trusted_Connection={trustedconn}"
        
    conn_str = urllib.parse.quote_plus(conn_str)
    engine = sqla.create_engine(f"mssql+pyodbc:///?odbc_connect={conn_str}")
       
    start_time = perf()

    # create SQL table from the dataframe
    print("Executing query. Results loading into dataframe...")
    df = pd.read_sql_query(sql=query_str, con=engine)
    rowcnt = df.shape[0]
    
    et_mins = round((perf() - start_time) / 60, 2)
    print(f"Successfully executed query in {et_mins} minutes. {rowcnt} rows loaded into dataframe.")
    
    return df

def get_ffs_df(qry_file, db_name, speed_tbl, tmc_tbl_geom, tmc_tbl_dyr, data_yr, tmc_yr):
    
    with open(qry_file, 'r') as f:
        q_str = f.read()
        
    q_str_formatted = q_str.format(tmc_tbl_geom, speed_tbl, tmc_yr, data_yr, tmc_tbl_dyr)
    
    out_df = sqlqry_to_df(q_str_formatted, db_name)
    
    return out_df
    

In [7]:
query_file = 'speed_cal_freeflow.sql'

db = 'NPMRDS'
tbl_speeds = 'npmrds_2017_alltmc_paxtruck_comb'  # 'npmrds_2019_alltmc_paxtruck_comb'
tbl_tmcs_geom = 'npmrds_2020_alltmc_txt' # TMCs used for mapping and conflation
tbl_tmcs_datayear = 'npmrds_2017_all_tmcs_txt'  # 'npmrds_2019_all_tmcs_txt' # TMCs corresponding to data year of the speed data
tmc_year = 2020 # year represented by the TMC master file
data_year = 2017 # year of the speed data

join_key = 'tmc'


df_master = get_ffs_df(query_file, db, tbl_speeds, tbl_tmcs_geom, tbl_tmcs_datayear, data_year, tmc_year)

        
print("successfully created dataframe.")
        

    


Executing query. Results loading into dataframe...
Successfully executed query in 2.56 minutes. 7495 rows loaded into dataframe.
successfully created dataframe.


In [6]:
df_master.head()

Unnamed: 0,tmc,road,f_system,len_mi2017,len_mi2020,ffs_85th60th,ffs_85,avspd_10p4a,epoch_cnt10p4a
0,105P17071,CLAY STATION RD,,0.0347,0.033902,49.2,53.0,23.833333,2.0
1,105P17070,CLAY STATION RD,,0.012434,,,,,
2,105-16661,NICOLAUS RD,,1.881302,1.987958,55.0,60.0,50.754032,112.0
3,105P17076,CLAY STATION RD,,0.028968,,,,,
4,105-16660,NICOLAUS RD,,2.00178,2.000926,60.0,66.0,57.102806,110.0


In [7]:
# join to link feature class and export to GIS feature class
from arcgis.features import GeoAccessor, GeoSeriesAccessor
import datetime as dt

input_link_fc = r'Q:\SACSIM23\Network\SM23GIS\MN_link_forConflation_YZ\ConflationResults.gdb\conflation_INRIX_20211207'
links_jnky = 'Tmc'
link_fc_cols = ['OBJECTID', 'A', 'B', 'A_B', 'CAPC20', 'NAME', links_jnky,
       'RoadName', 'FwyTag', 'RoadNumber', 'Type', 'Mis_cf_check', 'SHAPE']


output_fgdb = r'Q:\SACSIM23\Network\SM23GIS\SM23Testing.gdb'
speed_data_year = 2016
TMC_yr = 2020

#-----------RUN script-----------
sufx = str(dt.datetime.now().strftime('%Y%m%d_%H%M'))

output_fc_name = f'InrixFFSpd_{speed_data_year}on{TMC_yr}TMC{sufx}'


sedf_links = pd.DataFrame.spatial.from_featureclass(input_link_fc)
sedf_links = sedf_links[link_fc_cols]
sedf_links = sedf_links.merge(df_master, how='left', left_on=links_jnky, right_on=join_key)
sedf_links.spatial.to_featureclass(os.path.join(output_fgdb, output_fc_name))

'Q:\\SACSIM23\\Network\\SM23GIS\\SM23Testing.gdb\\InrixFFSpd_2016on2020TMC20211213_1701'