In [None]:
%matplotlob inline
import psycopg2 as pg
import configparser
import datetime
import matplotlib.pyplot.scatter as plt
from pg import DB
import pandas as pd
import pandas.io.sql as psql

In [None]:
# setting up pgsql connection
db = DB("dbname=bigdata host=10.160.12.47 port=5432 user=ryu4 password=79512267")

In [21]:
# get_directional_data(start_point, end_point) returns the number of observations for the line segment that lies within
#  start_point and end_point. If it is bi-direction, it will also give the difference in observation for EB and WB
def get_directional_data(start_point, end_point):
    sql = '''SELECT date_trunc('hour', datetime_bin) as yyyymm, \
            COUNT(nullif(startpoint_name,\''''+ start_point + '''\')) AS "Number EB Obs", \
            COUNT(nullif(endpoint_name, \''''+ start_point + '''\')) AS "NUMBER WB Obs", \
            COUNT(nullif(startpoint_name, \''''+ start_point + '''\')) - COUNT(nullif(startpoint_name, \''''+ end_point + '''\')) AS "EB - WB" \
            FROM ryu4.aggr_5min_bt \
            WHERE startpoint_name IN (\''''+ start_point + '''\', \''''+ end_point + '''\') \
            AND endpoint_name IN (\''''+ start_point + '''\', \''''+ end_point + '''\') \
            GROUP BY yyyymm ORDER BY yyyymm;'''
    results = db.query(sql)
    return results

In [None]:
def remove_duplicates(start_end_points):
    filtered_points = []
    for point in start_end_points:
        inverted = {'startpoint_name': point['endpoint_name'], 'endpoint_name': point['startpoint_name']}
        if inverted not in filtered_points:
            filtered_points.append(point)
    
    return filtered_points

In [None]:
# get_points() gets the startpoint_name and endpoint_name of all routes
def get_points():
    query = db.query('''SELECT DISTINCT \
                        startpoint_name, endpoint_name \
                        FROM bluetooth.observations \
                        WHERE strpos(startpoint_name, '_') = 3;''')
    result = query.dictresult()
    return result

In [None]:
# since the get_points() query takes too long
start_end_points = [{'startpoint_name': 'FR_BA', 'endpoint_name': 'FB_BA'}, {'startpoint_name': 'DU_RO', 'endpoint_name': 'QU_RO'}, {'startpoint_name': 'FR_UN', 'endpoint_name': 'KN_UN'}, {'startpoint_name': 'CO_BA', 'endpoint_name': 'CO_UN'}, {'startpoint_name': 'FR_PA', 'endpoint_name': 'FR_JA'}, {'startpoint_name': 'QU_PA', 'endpoint_name': 'QU_JA'}, {'startpoint_name': 'QU_BV', 'endpoint_name': 'KN_PA'}, {'startpoint_name': 'QU_DF', 'endpoint_name': 'QU_RO'}, {'startpoint_name': 'AD_UN', 'endpoint_name': 'AD_YO'}, {'startpoint_name': 'QU_BA', 'endpoint_name': 'QU_ST'}, {'startpoint_name': 'RM_YO', 'endpoint_name': 'RM_UN'}, {'startpoint_name': 'FB_YK', 'endpoint_name': 'FR_UN'}, {'startpoint_name': 'QU_JA', 'endpoint_name': 'QU_PA'}, {'startpoint_name': 'KN_PA', 'endpoint_name': 'QU_BV'}, {'startpoint_name': 'FB_BA', 'endpoint_name': 'FR_BA'}, {'startpoint_name': 'KN_ST', 'endpoint_name': 'QU_ST'}, {'startpoint_name': 'FR_UN', 'endpoint_name': 'FB_YK'}, {'startpoint_name': 'QU_ST', 'endpoint_name': 'QU_BA'}, {'startpoint_name': 'FR_BA', 'endpoint_name': 'FR_SP'}, {'startpoint_name': 'DU_YO', 'endpoint_name': 'QU_YO'}, {'startpoint_name': 'KN_UN', 'endpoint_name': 'FR_UN'}, {'startpoint_name': 'FR_JA', 'endpoint_name': 'FR_PA'}, {'startpoint_name': 'QU_JA', 'endpoint_name': 'QU_YO'}, {'startpoint_name': 'DU_UN', 'endpoint_name': 'QU_UN'}, {'startpoint_name': 'QU_UN', 'endpoint_name': 'QU_SP'}, {'startpoint_name': 'WE_UN', 'endpoint_name': 'WE_BJ'}, {'startpoint_name': 'KN_YO', 'endpoint_name': 'QU_YO'}, {'startpoint_name': 'KN_BA', 'endpoint_name': 'FR_BA'}, {'startpoint_name': 'AD_JA', 'endpoint_name': 'AD_PA'}, {'startpoint_name': 'QU_SP', 'endpoint_name': 'KN_SP'}, {'startpoint_name': 'FR_SP', 'endpoint_name': 'KN_SP'}, {'startpoint_name': 'DU_BA', 'endpoint_name': 'DU_SP'}, {'startpoint_name': 'KN_BA', 'endpoint_name': 'QU_BA'}, {'startpoint_name': 'QU_PA', 'endpoint_name': 'DU_PA'}, {'startpoint_name': 'RM_JA', 'endpoint_name': 'RM_YO'}, {'startpoint_name': 'FR_PA', 'endpoint_name': 'KN_PA'}, {'startpoint_name': 'QU_JA', 'endpoint_name': 'KN_JA'}, {'startpoint_name': 'EA_BV', 'endpoint_name': 'RM_PA'}, {'startpoint_name': 'QU_BA', 'endpoint_name': 'DU_BA'}, {'startpoint_name': 'KN_YO', 'endpoint_name': 'KN_UN'}, {'startpoint_name': 'KN_DF', 'endpoint_name': 'KN_ST'}, {'startpoint_name': 'KN_UN', 'endpoint_name': 'QU_UN'}, {'startpoint_name': 'FR_YO', 'endpoint_name': 'FR_JA'}, {'startpoint_name': 'KN_DF', 'endpoint_name': 'QU_RO'}, {'startpoint_name': 'DU_JA', 'endpoint_name': 'DU_PA'}, {'startpoint_name': 'QU_ST', 'endpoint_name': 'QU_DF'}, {'startpoint_name': 'KN_SP', 'endpoint_name': 'KN_BA'}, {'startpoint_name': 'KN_BA', 'endpoint_name': 'KN_SP'}, {'startpoint_name': 'QU_BV', 'endpoint_name': 'QU_PA'}, {'startpoint_name': 'WE_YO', 'endpoint_name': 'WE_UN'}, {'startpoint_name': 'FR_YO', 'endpoint_name': 'KN_YO'}, {'startpoint_name': 'KN_YO', 'endpoint_name': 'KN_JA'}, {'startpoint_name': 'CO_UN', 'endpoint_name': 'CO_BA'}, {'startpoint_name': 'DU_SP', 'endpoint_name': 'DU_BA'}, {'startpoint_name': 'QU_PA', 'endpoint_name': 'KN_PA'}, {'startpoint_name': 'FR_SP', 'endpoint_name': 'FR_BA'}, {'startpoint_name': 'QU_SP', 'endpoint_name': 'QU_BA'}, {'startpoint_name': 'QU_RO', 'endpoint_name': 'QU_DF'}, {'startpoint_name': 'DU_UN', 'endpoint_name': 'DU_YO'}, {'startpoint_name': 'DU_UN', 'endpoint_name': 'CO_UN'}, {'startpoint_name': 'FB_YK', 'endpoint_name': 'FB_SP'}, {'startpoint_name': 'KN_SP', 'endpoint_name': 'QU_SP'}, {'startpoint_name': 'CO_PA', 'endpoint_name': 'CO_UN'}, {'startpoint_name': 'QU_YO', 'endpoint_name': 'KN_YO'}, {'startpoint_name': 'DU_DF', 'endpoint_name': 'DU_BA'}, {'startpoint_name': 'QU_DF', 'endpoint_name': 'KN_DF'}, {'startpoint_name': 'FR_UN', 'endpoint_name': 'FR_SP'}, {'startpoint_name': 'DU_UN', 'endpoint_name': 'DU_SP'}, {'startpoint_name': 'QU_YO', 'endpoint_name': 'QU_JA'}, {'startpoint_name': 'DU_PA', 'endpoint_name': 'QU_PA'}, {'startpoint_name': 'QU_UN', 'endpoint_name': 'DU_UN'}, {'startpoint_name': 'DU_DF', 'endpoint_name': 'QU_DF'}, {'startpoint_name': 'RM_PA', 'endpoint_name': 'EA_BV'}, {'startpoint_name': 'DU_JA', 'endpoint_name': 'QU_JA'}, {'startpoint_name': 'FR_JA', 'endpoint_name': 'KN_JA'}, {'startpoint_name': 'FB_SP', 'endpoint_name': 'FR_SP'}, {'startpoint_name': 'FR_JA', 'endpoint_name': 'FR_YO'}, {'startpoint_name': 'DU_PA', 'endpoint_name': 'CO_PA'}, {'startpoint_name': 'QU_BA', 'endpoint_name': 'KN_BA'}, {'startpoint_name': 'KN_UN', 'endpoint_name': 'KN_YO'}, {'startpoint_name': 'KN_DF', 'endpoint_name': 'QU_DF'}, {'startpoint_name': 'KN_SP', 'endpoint_name': 'KN_UN'}, {'startpoint_name': 'QU_DF', 'endpoint_name': 'QU_ST'}, {'startpoint_name': 'FR_SP', 'endpoint_name': 'FB_SP'}, {'startpoint_name': 'KN_ST', 'endpoint_name': 'KN_BA'}, {'startpoint_name': 'QU_SP', 'endpoint_name': 'QU_UN'}, {'startpoint_name': 'FR_BA', 'endpoint_name': 'KN_BA'}, {'startpoint_name': 'DU_RO', 'endpoint_name': 'DU_DF'}, {'startpoint_name': 'EA_BV', 'endpoint_name': 'QU_BV'}, {'startpoint_name': 'QU_ST', 'endpoint_name': 'KN_ST'}, {'startpoint_name': 'QU_DF', 'endpoint_name': 'DU_DF'}, {'startpoint_name': 'DU_BA', 'endpoint_name': 'DU_DF'}, {'startpoint_name': 'DU_SP', 'endpoint_name': 'QU_SP'}, {'startpoint_name': 'FB_SP', 'endpoint_name': 'FB_BA'}, {'startpoint_name': 'KN_JA', 'endpoint_name': 'QU_JA'}, {'startpoint_name': 'QU_YO', 'endpoint_name': 'QU_UN'}, {'startpoint_name': 'KN_JA', 'endpoint_name': 'KN_YO'}, {'startpoint_name': 'KN_YO', 'endpoint_name': 'FR_YO'}, {'startpoint_name': 'DU_YO', 'endpoint_name': 'DU_JA'}, {'startpoint_name': 'QU_JA', 'endpoint_name': 'DU_JA'}, {'startpoint_name': 'QU_PA', 'endpoint_name': 'QU_BV'}, {'startpoint_name': 'KN_PA', 'endpoint_name': 'FR_PA'}, {'startpoint_name': 'DU_BA', 'endpoint_name': 'QU_BA'}, {'startpoint_name': 'QU_BA', 'endpoint_name': 'QU_SP'}, {'startpoint_name': 'RM_PA', 'endpoint_name': 'RM_JA'}, {'startpoint_name': 'QU_UN', 'endpoint_name': 'QU_YO'}, {'startpoint_name': 'KN_JA', 'endpoint_name': 'FR_JA'}, {'startpoint_name': 'KN_PA', 'endpoint_name': 'QU_PA'}, {'startpoint_name': 'KN_BA', 'endpoint_name': 'KN_ST'}, {'startpoint_name': 'QU_UN', 'endpoint_name': 'KN_UN'}, {'startpoint_name': 'AD_SP', 'endpoint_name': 'AD_UN'}, {'startpoint_name': 'FB_BA', 'endpoint_name': 'FB_SP'}, {'startpoint_name': 'DU_PA', 'endpoint_name': 'DU_JA'}, {'startpoint_name': 'QU_BV', 'endpoint_name': 'EA_BV'}, {'startpoint_name': 'CO_BA', 'endpoint_name': 'DU_BA'}, {'startpoint_name': 'CO_PA', 'endpoint_name': 'DU_PA'}, {'startpoint_name': 'QU_SP', 'endpoint_name': 'DU_SP'}, {'startpoint_name': 'QU_RO', 'endpoint_name': 'DU_RO'}, {'startpoint_name': 'RM_UN', 'endpoint_name': 'RM_SP'}, {'startpoint_name': 'AD_YO', 'endpoint_name': 'AD_JA'}, {'startpoint_name': 'KN_UN', 'endpoint_name': 'KN_SP'}, {'startpoint_name': 'CO_UN', 'endpoint_name': 'CO_PA'}, {'startpoint_name': 'KN_SP', 'endpoint_name': 'FR_SP'}, {'startpoint_name': 'CO_UN', 'endpoint_name': 'DU_UN'}, {'startpoint_name': 'DU_YO', 'endpoint_name': 'DU_UN'}, {'startpoint_name': 'KN_PA', 'endpoint_name': 'KN_JA'}, {'startpoint_name': 'FR_UN', 'endpoint_name': 'FR_YO'}, {'startpoint_name': 'QU_YO', 'endpoint_name': 'DU_YO'}, {'startpoint_name': 'RM_SP', 'endpoint_name': 'RM_BA'}, {'startpoint_name': 'FR_YO', 'endpoint_name': 'FR_UN'}, {'startpoint_name': 'KN_ST', 'endpoint_name': 'KN_DF'}, {'startpoint_name': 'FB_SP', 'endpoint_name': 'FB_YK'}, {'startpoint_name': 'KN_JA', 'endpoint_name': 'KN_PA'}, {'startpoint_name': 'DU_JA', 'endpoint_name': 'DU_YO'}, {'startpoint_name': 'QU_RO', 'endpoint_name': 'KN_DF'}, {'startpoint_name': 'DU_BA', 'endpoint_name': 'CO_BA'}, {'startpoint_name': 'DU_SP', 'endpoint_name': 'DU_UN'}, {'startpoint_name': 'DU_DF', 'endpoint_name': 'DU_RO'}, {'startpoint_name': 'FR_SP', 'endpoint_name': 'FR_UN'}]
#start_end_points = get_points()
# remove duplicates where its the same but WB
start_end_points = remove_duplicates(start_end_points)

In [None]:
#print(start_end_points)
for point_set in start_end_points:
#    print(point_set['startpoint_name'] + " to " + point_set['endpoint_name'])
    result = get_directional_data(str(point_set['startpoint_name']), str(point_set['endpoint_name']))
    east_bound = []
    west_bound = []
    time = []