# Flagging days/periods with Snow

The following code will take December 2017 data from the King Street Pilot, compare it to baseline values, and will include a flag corresponding to whether or not it snowed in the respective time period. 

In [78]:
# import relevant modules
import configparser
from psycopg2 import connect
import psycopg2.sql as pg
import pandas.io.sql as pandasql
import datetime

In [79]:
# connect to database
CONFIG = configparser.ConfigParser()
CONFIG.read(r'C:\Users\alouis2\Documents\Python Scripts\db.cfg')
dbset = CONFIG['DBSETTINGS']
con = connect(**dbset)

The following SQL query pulls the date, street, direction, and corresponding king street pilot and baseline values. We are only concerned with Weekday values. 

In [80]:
# run query
sql = pg.SQL('''select pilot.dt, pilot.street, pilot.direction, pilot.period, 
                pilot.tt as pilot, baseline.tt as baseline 
                from king_pilot.dash_daily_dev as pilot,  king_pilot.dash_baseline_dev as baseline 
                where (dt = '2017-12-11' or dt = '2017-12-12' or dt = '2017-12-13' 
                or dt = '2017-12-15' or dt = '2017-12-18' or dt = '2017-12-21' or dt = '2017-12-22' or dt = '2017-12-29')
                and category = 'Pilot' and  pilot.day_type = 'Weekday' and pilot.street = baseline.street 
                and pilot.direction = baseline.direction and pilot.day_type = baseline.day_type 
                and pilot.period = baseline.period order by dt, street, direction, period''')

snow = pandasql.read_sql(sql, con)

The following adds in a 'change' column to the dataframe. This column will represent the difference in travel time from pilot to baseline. 

Moreover, a flag column is created that will contain binary values corresponding to whether or not it snowed in this time period. 1 will represent snow, and 0 will represent no snow. For now the default values of the flag column are set at 0. 

In [81]:
change = [snow.pilot.values[i]-snow.baseline.values[i] for i in range(len(snow))]
snow['change'] = change
flag = [0 for i in range(len(snow))]
snow['snow_flag'] = flag 

'snow_periods' is a dictionary where the keys are days where snow was present, and the paired values are the time periods corresponding to when it snowed on that day. 

In [82]:
snow_periods = {datetime.date(2017,12,11):['AM Peak', 'PM Peak', 'Evening'], 
                datetime.date(2017,12,12):['AM Peak', 'Midday'],
                datetime.date(2017,12,13):['AM Peak', 'Evening'],
                datetime.date(2017,12,15):['Midday', 'PM Peak'],
                datetime.date(2017,12,18):['AM Peak'],
                datetime.date(2017,12,21):['Evening'],
                datetime.date(2017,12,22):['AM Peak', 'Midday', 'PM Peak'],
                datetime.date(2017,12,29):['AM Peak', 'Midday']}

A nested for-loop is run to flag snow days, i.e. change the value of snow_flag from 0 to 1. 

In [83]:
for key in snow_periods:
    for i in snow_periods[key]:
        for j in range(len(snow)):
            if snow.dt.values[j] == key and snow.period.values[j] == i:
                snow.snow_flag.values[j] += 1        

The final dataframe with snow flags:

In [84]:
snow

Unnamed: 0,dt,street,direction,period,pilot,baseline,change,snow_flag
0,2017-12-11,Adelaide,Eastbound,AM Peak,7.184190,7.864729,-0.680538,1
1,2017-12-11,Adelaide,Eastbound,Evening,6.146098,8.071130,-1.925032,1
2,2017-12-11,Adelaide,Eastbound,Midday,8.980245,9.410475,-0.430230,0
3,2017-12-11,Adelaide,Eastbound,PM Peak,25.102782,14.699972,10.402809,1
4,2017-12-11,Bathurst,Northbound,AM Peak,5.440064,5.298643,0.141421,1
5,2017-12-11,Bathurst,Northbound,Evening,4.239965,5.412162,-1.172197,1
6,2017-12-11,Bathurst,Northbound,Midday,5.070752,5.607554,-0.536801,0
7,2017-12-11,Bathurst,Northbound,PM Peak,6.383813,6.108102,0.275711,1
8,2017-12-11,Bathurst,Southbound,AM Peak,4.559895,5.109653,-0.549758,1
9,2017-12-11,Bathurst,Southbound,Evening,3.887059,5.248978,-1.361919,1


NameError: name 'datetime' is not defined