# WFRC Example Data

Convert the full Salt Lake City model data found at [WFRC's GitHub](github.com/WFRCAnalytics/utah_bike_demand_model) into a smaller dataset suitable for testing and development work.

In [1]:
import os
import shutil
import yaml

import pandas as pd
import numpy as np
import pyproj
import shapely

from shapely.geometry import Point
from shapely.ops import transform

from functools import partial

print('pyproj', pyproj.__version__)
print('shapely', shapely.__version__)

pyproj 3.0.0.post1
shapely 1.7.1


In [2]:
class Config():
    
    def __init__(self, inputs, configs, crs):
        
        assert os.path.isdir(inputs)
        assert os.path.isdir(configs)
        
        self.inputs = inputs
        self.configs = configs
        self.crs = pyproj.crs.CRS.from_user_input(crs)

        with open(os.path.join(self.configs, 'network.yaml')) as f:
            settings = yaml.safe_load(f)
            
            self.link_file = settings.get('link_file')
            self.from_node = settings.get('from_name')
            self.to_node = settings.get('to_name')
            
            self.node_file = settings.get('node_file')
            self.node_name = settings.get('node_name')
            self.node_x_name = settings.get('node_x_name')
            self.node_y_name = settings.get('node_y_name')
        
        with open(os.path.join(self.configs, 'zone.yaml')) as f:
            settings = yaml.safe_load(f)
            
            self.zone_file = settings.get('zone_file_name')

        self.link_df = self.read(self.link_file)
        self.node_df = self.read(self.node_file)
        self.zone_df = self.read(self.zone_file)
        
        self.node_df.set_index(self.node_name, inplace=True)  
        
        # nodes as shapely Points
        self.node_points = pd.Series(
            [Point(x, y) for x, y in
                 zip(self.node_df[self.node_x_name], self.node_df[self.node_y_name])])
    
    def read(self, input_file):
        return pd.read_csv(os.path.join(self.inputs, input_file))
    
    def bounding_circle(self, center_lon_lat, radius):
        """
        center_lon_lat: (longitude,  latitude) tuple
        radius: radius in whatever units match input data CRS
        """
        
        center_point = Point(center_lon_lat)
        
        wgs84_to_local = partial(
            pyproj.transform,
            pyproj.Proj("+proj=longlat +datum=WGS84 +no_defs"),
            self.crs,
        )
        
        center_transformed = transform(
            wgs84_to_local,
            center_point)
        
        assert min(self.node_df[self.node_x_name]) <= center_transformed.x <= max(self.node_df[self.node_x_name]) and \
            min(self.node_df[self.node_y_name]) <= center_transformed.y <= max(self.node_df[self.node_y_name]), \
            "center is not within region"

        return center_transformed.buffer(radius)
    
    def trim_data(self, center, radius):
        
        circle_poly = self.bounding_circle(center, radius)
        
        circle_nodes = self.node_df[
            self.node_points.apply(lambda p: p.intersects(circle_poly))]
        
        print(
            f"trimmed nodes: {len(circle_nodes)} ({(len(circle_nodes) * 100 / len(self.node_df)):.2f} %)")
        
        circle_links = config.link_df[
            self.link_df[self.from_node].isin(circle_nodes.index) &
            self.link_df[self.to_node].isin(circle_nodes.index)
        ]
        
        print(
            f"trimmed links: {len(circle_links)} ({(len(circle_links) * 100 / len(self.link_df)):.2f} %)")

        circle_zones = self.zone_df[self.zone_df[self.node_name].isin(circle_nodes.index)]
        
        print(
            f"trimmed zones: {len(circle_zones)} ({(len(circle_zones) * 100 / len(self.zone_df)):.2f} %)")
        
        return circle_nodes.reset_index(), circle_links, circle_zones
    
    def write_example(self, path, nodes, links, zones, copy_configs=False):
        
        os.mkdir(path)  # safety: don't overwrite existing folder
        
        example_inputs = os.path.join(path, os.path.basename(self.inputs))
        os.mkdir(example_inputs)
        
        print(f'writing files to {example_inputs}')
        
        nodes.to_csv(os.path.join(example_inputs, self.node_file))
        links.to_csv(os.path.join(example_inputs, self.link_file))
        zones.to_csv(os.path.join(example_inputs, self.zone_file))
        
        if copy_configs:
        
            example_configs = os.path.join(path, os.path.basename(self.configs))

            print(f'copying configs to {example_configs}')
            print('consider shortening max dist in example configuration '
                  'to accommodate smaller network and further improve runtime')

            shutil.copytree(self.configs, example_configs)
        

In [3]:
config = Config(
    inputs='../../utah_bike_demand_model/Model_Inputs',
    configs='../../utah_bike_demand_model/Model_Configs',
    crs='epsg:26912',
)

In [4]:
center = -111.8967904, 40.7633270  # downtown Salt Lake City
radius = 5000  # input crs is in meters

nodes, links, zones = config.trim_data(center, radius)

trimmed nodes: 4874 (4.49 %)
trimmed links: 6591 (4.80 %)
trimmed zones: 463 (3.08 %)


In [5]:
config.write_example(
    path='test_example',
    nodes=nodes,
    links=links,
    zones=zones,
    copy_configs=True)

writing files to test_example/Model_Inputs
copying configs to test_example/Model_Configs
consider shortening max dist in example configuration to accommodate smaller network and further improve runtime
