In [1]:
import json

import boto3
import numpy as np

import config as cfg

In [2]:
with open('kcm_routes_exploded.geojson', 'r') as f:
    kcm_routes = json.load(f)

In [3]:
# Sort route data by its route label to make processing it in the next step faster
data = kcm_routes['features']
label_sorted = {}
for datapoint in data:
    k = datapoint['properties']['ROUTE_ID']

    if k not in label_sorted.keys():
        label_sorted[k] = [datapoint]
    else:
        label_sorted[k].append(datapoint)

for key in label_sorted.keys():
    # Open file specific to each label
    with open(f"sorted_labels/label_{key}.json", "w+") as outfile:
        json.dump(label_sorted[key], outfile)

# Save a list of keys
with open("sorted_labels/labels.json", "w+") as outfile:
    json.dump(list(label_sorted.keys()), outfile)

In [4]:
# Matching route segments to get segments that are connected and larger than 100 meters
feature_list = []
isolated_list = []
keys = []
with open("sorted_labels/labels.json", "r") as keyfile:
    keys = json.load(keyfile)
for k in keys:
    with open(f"sorted_labels/label_{k}.json", "r") as f:
        data = json.load(f)
        i = 0
        isolated_segments = []
        while i < len(data):
            if data[i]['properties']['SEG_LENGTH'] > 100:
                i += 1
                continue
            segment = data.pop(i)
            start = segment['geometry']['coordinates'][0]
            end = segment['geometry']['coordinates'][-1]
            flag = False
            for j in range(0, len(data)):
                seg2 = data[j]
                if end == seg2['geometry']['coordinates'][0]:
                    seg2['geometry']['coordinates'].append(segment['geometry']['coordinates'][0])
                    seg2['properties']['SEG_LENGTH'] += segment['properties']['SEG_LENGTH']
                    flag = True
                    break
                elif start == seg2['geometry']['coordinates'][-1]:
                    seg2['geometry']['coordinates'].append(segment['geometry']['coordinates'][-1])
                    seg2['properties']['SEG_LENGTH'] += segment['properties']['SEG_LENGTH']
                    flag = True
                    break
            if not flag:
                isolated_segments.append(segment)
        feature_list.extend(data)
        isolated_list.extend(isolated_segments)

In [5]:
# Add the features back to the kcm dataset, to replace the exploded route segments and write to file
kcm_routes['features'] = feature_list
with open('kcm_routes_exploded_modified.geojson', 'w+') as f:
    json.dump(kcm_routes, f)

In [7]:
with open('kcm_routes_exploded_modified.geojson', 'r') as f:
    kcm_routes = json.load(f)

# Function that iterates through an obj containing nested dicts and lists to replace all floats with strings
# This is necessary because Dynamodb does not support float values
def replace_floats(obj):
    if isinstance(obj, list):
        for i in range(0,len(obj)):
            obj[i] = replace_floats(obj[i])
        return obj
    elif isinstance(obj, dict):
        for k in obj.keys():
            obj[k] = replace_floats(obj[k])
        return obj
    elif isinstance(obj, float):
        if obj % 1 == 0:
            return int(obj)
        else:
            return str(obj)
    else:
        return obj

# Turn all float values (coordinates mostly) into strings in the routes geojson
kcm_routes_stringified = replace_floats(kcm_routes)

# Check number of features in the dataset
print(f"{len(kcm_routes_stringified['features'])} features in data")

# Add the local express to the route id to create unique key for each segment (dynamodb only can have a 2-composite key)
for feature in kcm_routes['features']:
    feature['route_id'] = feature['properties']['ROUTE_ID']
    feature['segment_id'] = feature['properties']['SEG_ID']
    if feature['properties']['LOCAL_EXPR'] == 'L':
        feature['route_id'] = int(str(feature['route_id']) + str(0))
    else:
        feature['route_id'] = int(str(feature['route_id']) + str(1))
        
with open('kcm_routes_exploded_modified.geojson', 'w+') as f:
    json.dump(kcm_routes, f)

77223 features in data


In [8]:
kcm_routes

{'type': 'FeatureCollection',
 'name': 'kcm_routes_exploded',
 'crs': {'type': 'name',
  'properties': {'name': 'urn:ogc:def:crs:OGC:1.3:CRS84'}},
 'features': [{'type': 'Feature',
   'properties': {'OBJECTID': 1,
    'CHANGE_NUM': 143,
    'MINOR_CHAN': 10,
    'CURRENT_NE': 'IN SERVICE',
    'IN_SERVICE': 'Y',
    'ROUTE_ID': 100001,
    'LOCAL_EXPR': 'L',
    'ROUTE_NUM': 1,
    'SHAPE_Leng': '41571.63326159131',
    'SEG_ID': 2,
    'SEG_LENGTH': '194.083'},
   'geometry': {'type': 'LineString',
    'coordinates': [['-122.31809871857253', '47.59919544437041'],
     ['-122.31963253437074', '47.59918857685953'],
     ['-122.31774974990182', '47.599198078595286'],
     ['-122.31774974990182', '47.599198078595286'],
     ['-122.31774974990182', '47.599198078595286']]},
   'route_id': 1000010,
   'segment_id': 2},
  {'type': 'Feature',
   'properties': {'OBJECTID': 1,
    'CHANGE_NUM': 143,
    'MINOR_CHAN': 10,
    'CURRENT_NE': 'IN SERVICE',
    'IN_SERVICE': 'Y',
    'ROUTE_ID': 1000

In [7]:
# Set up the connection to the Dynamodb database
dynamodb = boto3.resource('dynamodb',
                         region_name=cfg.REGION,
                         aws_access_key_id = cfg.ACCESS_ID,
                         aws_secret_access_key = cfg.ACCESS_KEY
                         )

In [8]:
# Create table with one key on the route_id for each route
# The rest of the table structure can be open, only required values on insert are keys
table = dynamodb.create_table(TableName='KCM_Bus_Routes_Modified',
                            KeySchema=[
                                {
                                    'AttributeName': 'route_id',
                                    'KeyType': 'HASH'
                                },
                                {
                                    'AttributeName': 'segment_id',
                                    'KeyType': 'RANGE'
                                }
                            ],
                            AttributeDefinitions=[
                                {
                                    'AttributeName': 'route_id',
                                    'AttributeType': 'N'
                                },
                                {
                                    'AttributeName': 'segment_id',
                                    'AttributeType': 'N'
                                }
                            ],
                            ProvisionedThroughput={
                                'ReadCapacityUnits': 20,
                                'WriteCapacityUnits': 20
                            }
                     )

# Wait until the table exists.
table.meta.client.get_waiter('table_exists').wait(TableName='KCM_Bus_Routes_Modified')

# Check that 0 items are in the table
print(table.item_count)

0


In [10]:
# Takes about 1hr to do 70,000 features; capacity constrained to stay in AWS free-tier
with table.batch_writer() as batch:
    for route in kcm_routes['features']:
        batch.put_item(Item={
            'route_id': route['route_id'],
            'segment_id': route['segment_id'],
            'local_express_code': route['properties']['LOCAL_EXPR'],
            'historic_speeds': [],
            'avg_speed_m_s': 0
        })