## Down Sample the Sqlite Database to .csv Files Used in Element 3
This file will go through the steps to downsample the sqlite database created in MnDot_All_Data_Prep.  The downsampled data will be .csv files with information that is directly relevant to the visualization.  The downsampled data will be taken at 10min intervals.

In [1]:
#Import needed libraries
import sqlite3
import datetime as dt
from dateutil.relativedelta import relativedelta
import csv

#Connect to database
database = "fhwa_mn.sqlite"
conn = sqlite3.connect(database)
c = conn.cursor()

#Set up timeframes
s = '2014-02-19'
start = dt.datetime.strptime(s, '%Y-%m-%d')
time_step = relativedelta(minutes=10)

In [None]:
%%time
#Add indexes to the tables used in downsampling

#Update Table Indexes
c.execute('CREATE INDEX update_vname_index ON "update" (VehicleName);')
c.execute('CREATE INDEX update_tstamp_index ON "update" (TimeStamp);')
c.execute('CREATE INDEX update_gps_index ON "update" (GPS_Quality);')

#Weather Table Indexes
c.execute('CREATE INDEX weater_stationID_index ON "weather" (StationID);')
c.execute('CREATE INDEX weater_obsType_index ON "weather" (ObsTypeName);')
c.execute('CREATE INDEX weater_tstamp_index ON "weather" (TimeStamp);')

#Mdtx Table Indexes
c.execute('CREATE INDEX mdtx_vname_index ON "mdtx" (VehicleName);')
c.execute('CREATE INDEX mdtx_tstamp_index ON "mdtx" (TimeStamp);')
c.execute('CREATE INDEX mdtx_gps_index ON "mdtx" (GPS_Quality);')

#Vaix Table Indexes
c.execute('CREATE INDEX vaix_vname_index ON "vaix" (VehicleName);')
c.execute('CREATE INDEX vaix_tstamp_index ON "vaix" (TimeStamp);')
c.execute('CREATE INDEX vaix_gps_index ON "vaix" (GPS_Quality);')

In [2]:
%%time
#The data has light vehicles, lets remove these
not_plow = ["204360","204362","206031","207301","209118","209134","209217","210161","210333","210335" \
            ,"211277","211548","212291","212312","212457","212458","213128","Jakin","Joe"]

#Find list of vehicle names from update table
c.execute('SELECT DISTINCT VehicleName FROM "update" WHERE VehicleName NOT IN ('+', '.join(['"{}"'.format(value) for value in not_plow])+')')
r = c.fetchall()
update_vehicles = [x[0] for x in r]

#Find list of weather nodes from weather table
c.execute('SELECT DISTINCT StationID FROM weather')
r = c.fetchall()
weather_nodes = [x[0] for x in r]

#Find list of vehicle names from mdtx table
c.execute('SELECT DISTINCT VehicleName FROM mdtx WHERE VehicleName NOT IN ('+', '.join(['"{}"'.format(value) for value in not_plow])+')')
r = c.fetchall()
mdtx_vehicles = [x[0] for x in r]

#Find list of vehicle names from vaix table
c.execute('SELECT DISTINCT VehicleName FROM vaix WHERE VehicleName NOT IN ('+', '.join(['"{}"'.format(value) for value in not_plow])+')')
r = c.fetchall()
vaix_vehicles = [x[0] for x in r]

CPU times: user 2.16 s, sys: 71.3 ms, total: 2.24 s
Wall time: 2.24 s


In [3]:
%%time
ObsType = ['precipIntensity', 'essSurfaceStatus']

#Select all the weather data from the database
command = """
SELECT StationID, ObsTypeName, strftime('%%Y%%m%%d%%H0', Timestamp) + strftime('%%M', Timestamp)/10,
  Latitude, Longitude, Observation
FROM weather
WHERE Timestamp >= ? AND Timestamp <= ? AND StationID IN ('%s') AND ObsTypeName IN ('%s') 
GROUP BY StationID, ObsTypeName, strftime('%%Y%%m%%d%%H0', Timestamp) + strftime('%%M', Timestamp)/10,
  Latitude, Longitude
HAVING MIN(ROWID)
ORDER BY StationID, ObsTypeName DESC, Timestamp ASC;
""" % (("', '".join(str(w) for w in weather_nodes)), "', '".join(ObsType))

#Write the selected data to a .csv file
headers=['StationID', 'ObsTypeName', 'Timestamp', 'Latitude', 'Longitude', 'Observation']
with open('data/weather_data.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(headers)
    c.execute(command, (start, start+time_step*578))
    for row in c.fetchall():
        # Fix the timestamp format
        timestamp = dt.datetime.strptime(str(row[2]), '%Y%m%d%H%M')
        # We rounded to 10 minute intervals
        timestamp = timestamp.replace(minute=timestamp.minute*10)
        output_row = (row[0], row[1], timestamp.strftime("%Y-%m-%d %H:%M:%S.%f"), row[3], row[4], row[5])
        writer.writerow(output_row)

CPU times: user 2.97 s, sys: 60.7 ms, total: 3.03 s
Wall time: 3.05 s


In [4]:
%%time
#Select all the data from the update table
command = """
SELECT VehicleName, strftime('%%Y%%m%%d%%H0', Timestamp) + strftime('%%M', Timestamp)/10 AS Timestamp, Latitude,
  Longitude
FROM 'update'
WHERE Timestamp >= ? AND Timestamp <= ? AND GPS_Quality != 0 AND VehicleName IN ('%s')
GROUP BY VehicleName, strftime('%%Y%%m%%d%%H0', Timestamp) + strftime('%%M', Timestamp)/10
HAVING MIN(ROWID)
ORDER BY VehicleName, Timestamp ASC;
""" % "', '".join(update_vehicles)

#Write the selected data to a .csv file
headers=['VehicleName', 'Timestamp', 'Latitude', 'Longitude']
with open('data/vehicle_data.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(headers)
    c.execute(command, (start, start+time_step*578))
    for row in c.fetchall():
        # Fix the timestamp format
        timestamp = dt.datetime.strptime(str(row[1]), '%Y%m%d%H%M')
        # We rounded to 10 minute intervals
        timestamp = timestamp.replace(minute=timestamp.minute*10)
        output_row = (row[0], timestamp.strftime("%Y-%m-%d %H:%M:%S.%f"), row[2], row[3])
        writer.writerow(output_row)

CPU times: user 26.4 s, sys: 6.24 s, total: 32.6 s
Wall time: 32.6 s


In [5]:
%%time
#Select all the data from the mdtx table
command = """
SELECT VehicleName, Timestamp, Latitude, Longitude, RoadCondition
FROM 'mdtx' 
WHERE VehicleName = ? AND Timestamp >= ? AND Timestamp <= ? AND GPS_Quality != 0
ORDER BY Timestamp ASC
LIMIT 1
"""
all_mdtx_lists = []
#For every vehicle
for v in mdtx_vehicles:
    my_mdtx_list = []
    #For every 10min interval
    for i in range(577):
        s = start + time_step*i
        e = start + time_step*(i+1)
        c.execute(command, (v, s, e))
        r = c.fetchone()
        if r is not None:
            my_mdtx_list.append(r)
    all_mdtx_lists.append(my_mdtx_list)
    
#Write the selected data to a .csv file
headers=['VehicleName', 'Timestamp', 'Latitude', 'Longitude', 'RoadCondition']
with open('data/mdtx_data.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(headers)
    for l in all_mdtx_lists:
        for row in l:
            writer.writerow(row)

CPU times: user 16.4 s, sys: 1.49 s, total: 17.9 s
Wall time: 17.9 s


In [6]:
%%time
#Select all the data from the vaix table
command = """
SELECT VehicleName, Timestamp, Latitude, Longitude, RoadTemp
FROM 'vaix' 
WHERE VehicleName = ? AND Timestamp >= ? AND Timestamp <= ? AND GPS_Quality != 0
ORDER BY Timestamp ASC
LIMIT 1
"""
all_vaix_lists = []
#For every vehicle
for v in vaix_vehicles:
    my_vaix_list = []
    #For every 10min interval
    for i in range(577):
        s = start + time_step*i
        e = start + time_step*(i+1)
        c.execute(command, (v, s, e))
        r = c.fetchone()
        if r is not None:
            my_vaix_list.append(r)
    all_vaix_lists.append(my_vaix_list)
    
#Write the selected data to a .csv file
headers=['VehicleName', 'Timestamp', 'Latitude', 'Longitude', 'RoadTemp']
with open('data/vaix_data.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(headers)
    for l in all_vaix_lists:
        for row in l:
            writer.writerow(row)

CPU times: user 1min 1s, sys: 27.2 s, total: 1min 28s
Wall time: 1min 28s
