# NPMRDS Exploration for Model Speed Validation
This notebook is a sandbox for visually exploring NPMRDS data.

In [2]:
# Function to load SQL Query Results into a DataFrame
from time import perf_counter as perf

import pandas as pd
import urllib

import sqlalchemy as sqla # needed to run pandas df.to_sql() function
    
# extract SQL Server query results into a pandas dataframe   
def sqlqry_to_df(query_str, dbname, servername='SQL-SVR', trustedconn='yes'):     

    conn_str = "DRIVER={ODBC Driver 17 for SQL Server};" \
        f"SERVER={servername};" \
        f"DATABASE={dbname};" \
        f"Trusted_Connection={trustedconn}"
        
    conn_str = urllib.parse.quote_plus(conn_str)
    engine = sqla.create_engine(f"mssql+pyodbc:///?odbc_connect={conn_str}")
       
    start_time = perf()

    # create SQL table from the dataframe
    print("Executing query. Results loading into dataframe...")
    df = pd.read_sql_query(sql=query_str, con=engine)
    rowcnt = df.shape[0]
    
    et_mins = round((perf() - start_time) / 60, 2)
    print(f"Successfully executed query in {et_mins} minutes. {rowcnt} rows loaded into dataframe.")
    
    return df

## Chart 1: Single TMC Analysis
Plots all travel times for a single TMC within a given time period, to help identify method for outlier exclusion

In [None]:
import cufflinks as cf

db = "NPMRDS"
test_qry = "SELECT TOP 100 * FROM npmrds_2017_alltmc_paxveh"

sqlqry_to_df(query_str, dbname