# Traffic Volume Breakdown by Curb
The Tom Tom Area Analysis data can tell us how many unique vehicles passed over a curb segment in a given hour. To do this we manually determine what segmentIds correspond to the different terminal curbsides (listed in variable `terminalsSegments`), and then parse through the geojson file collecting the `sampleSize` attribute for each segment. Often a curb will be made up of multiple segments. This breakdown doesn't do any aggregating or averaging, but rather just collects the data for each segment seperately. The results are saved in a csv file with one row for each segmentId.

## Set your filenames here

In [2]:
from os import path
import sys
import datetime

import os
import sys

def filename_path(filename):
    return os.path.join(os.environ['ATHENA_DATA_PATH'], filename)

def create_directory():
    directory = os.path.join(os.environ['ATHENA_DATA_PATH'], "CurbsideBreakdown")
    if not os.path.exists(directory):
        os.mkdir(directory)
        
create_directory()


dataFile = filename_path("AreaAnalysis/AreaAnalysis_Combined.geojson") # geojson format
timeStamp = datetime.datetime(2018, 9, 23, 0, 0, 0, 0) # This should be the first date present in dataFile
resultsFile = filename_path("CurbsideBreakdown/TrafficCountBreakdownByTerminal.csv") # csv format

if (not path.exists(dataFile)):
    print("WARNING: dataFile <" + dataFile + "> does not exist", file=sys.stderr)
if (path.exists(resultsFile)):
    print("WARNING: resultsFile <" + resultsFile + "> already exists and will be overwritten", file=sys.stderr)

## Run this cell to actually extract the curbside volume numbers and save to resultsFile

In [3]:
import json
import pandas as pd

terminalSegments = {
  "A": {
    "top1": [488400001916888, 488400001819546, 488400001051330, 488400001819545, 488400001944098, 488400001499749, -488400001499750, -488400001440507, -488400001068906, -488400001068907, -488400001440508, -488400001440506],
    "top2": [-488400001231410, -488400001231409, -488400001231408, -488400001231407],
    "bottom1": [488400004592626, 488400001816234, 488400001636761, 488400004593434, -488400001816233],
    "bottom2": [488400001892282]
  },
  "B": {
    "top1": [-488400001541937, 488400001812026, -488400000138950, -488400000138951],
    "top2": [488400000209688, 488400001253862],
    "top3": [488400001233111, 488400001171846, 488400001367832]
  },
  "C": {
    "top1": [488400001363987, 488400000262174, 488400000262173],
    "top2": [488400001084985, 488400001717041, 488400001051703, 488400001051704],
    "top3": [488400001440464, 488400001282416, 488400000210518, 488400001349998],
    "bottom1": [488400001236821, 488400001196888],
    "bottom2": [488400001411982],
    "bottom3": [488400001480332]
  },
  "D": {
    "top1": [488400001231232, 488400001155549, 488400001142328, 488400000993705, 488400001746378, 488400001736451, 488400000267344, 488400000267345, 488400001720769],
    "bottom1": [488400001746372, 488400001065468, 488400000321387, 488400001102357]
  },
  "E": {
    "top1": [-488400001537806, -488400001537807, -488400001537805],
    "top2": [488400001098621, -488400001599026, -488400001194605],
    "bottom1": [488400001429739],
    "bottom2": [488400000290828, -488400001224646, -488400000313157, 488400000321386, -488400001149285]
  }
}

with open(dataFile, 'r') as fp:
    rawData = json.load(fp)
    df = pd.DataFrame()
    for terminal, curbs in terminalSegments.items():
        print("Processing curbs in terminal " + terminal)
        for curbName, segmentIds in curbs.items():
            for segmentId in segmentIds:
                feature = next(x for x in rawData["features"] if x["properties"]["segmentId"] == segmentId)
                sampleSizes = [x["sampleSize"] for x in feature["properties"]["segmentTimeResults"]]
                df = df.append([[terminal, curbName, segmentId] + sampleSizes])
totalHours = len(rawData["features"][0]["properties"]["segmentTimeResults"])
timeStamps = [timeStamp + datetime.timedelta(hours=x) for x in range(totalHours)]
columnNames = ["Terminal", "CurbName", "SegmentId"] + timeStamps
df.columns = columnNames
print("Saving results to " + resultsFile)
df.to_csv(resultsFile)

Processing curbs in terminal A
Processing curbs in terminal B
Processing curbs in terminal C
Processing curbs in terminal D
Processing curbs in terminal E
Saving results to /Users/mlunacek/nrel/athena/ATHENA-twin-internal/src/athena/.data/CurbsideBreakdown/TrafficCountBreakdownByTerminal.csv
