In [1]:
from sodapy import Socrata
import geopandas as gpd
import json
from keplergl import KeplerGl

In [2]:
#insert your own token to avoid throttling
#found at https://data.cityofchicago.org/profile
token = 'OfytEmI12Fl0Yq13HTVq7ROlF'

In [3]:
# creating the daterange we'll use for this notebook
start = '2019-03-01'
end = '2019-03-31'

In [4]:
def get_rides(start, end, token=None):
    '''
    Creates a request for the SODA API datasets. 
    
    returns: geopandas coordinates for rides
    '''
    #instantiate the Socrata client
    client = Socrata(
        domain = 'data.cityofchicago.org',
        app_token=token, timeout=100000
    )
    
    #set up the SoSQL call
    select = 'pickup_centroid_location AS geometry, COUNT(trip_id) AS rides'
    group = 'geometry' 
    where = f'trip_start_timestamp between "{start}" and "{end}"'
    
    #make the call for the data, specifying the correct dataset
    results = client.get(
        dataset_identifier='m6dm-c72p',
        select=select, group=group, where=where,
        content_type = 'geojson', limit=100_000
    )
    client.close()
    
    #add data to geopandas
    gdf = gpd.GeoDataFrame.from_features(results).dropna()
    gdf.rides = gdf.rides.astype('int32')
    
    return gdf

In [5]:
# creating the daterange we'll use for this notebook
start = '2019-03-01'
end = '2019-03-31'

In [6]:
%%time
#calling our data gathering funciton
gdf_point = get_rides(start, end, token)

CPU times: user 72 ms, sys: 11.2 ms, total: 83.2 ms
Wall time: 41 s


In [7]:
#defining helper functions to load/save map config files
#(to maintain similar visuals)

def open_config(file_name):
    '''
    helper function to load preconfigured Kepler configs
    '''
    try: 
        current_file = json.load(open(file_name, 'r'))
        print('config loaded')
        return current_file
    except FileNotFoundError:
        print('no config file found. plz initialize map, and use map.config')
        return {}

    
def save_config(config, file_name):
    '''
    helper function to save the current config for Kepler
    '''
    def save():
        with open(file_name, 'w') as f:
                json.dump(config, f)
    try:
        current_file = json.load(open(file_name, 'r'))
        if config != current_file:
            print('changes detected. saving current config')
            save()
        else:
            print('no changes detected')
            
    except FileNotFoundError:
        print('no existing file in directory. saving current config')
        save()

In [8]:
map_1 = KeplerGl(height=900)
map_1.add_data(gdf_point, 'rides')
map_1

User Guide: https://github.com/keplergl/kepler.gl/blob/master/docs/keplergl-jupyter/user-guide.md


KeplerGl(data={'rides': {'index': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 2…

In [9]:
def save_config(config, file_name='config.json'):
    '''
    helper function to save the current config for Kepler
    '''
    def save():
        with open(file_name, 'w') as f:
                json.dump(config, f)
    try:
        current_file = json.load(open(file_name, 'r'))
        if config != current_file:
            print('changes detected. saving current config')
            save()
        else:
            print('no changes detected')
            
    except FileNotFoundError:
        print('no existing file in directory. saving current config')
        save()

save_config(map_1.config, file_name='community.json')

no changes detected


In [10]:
map_1.close()

In [11]:
def get_boundaries(token=None):
    '''
    Creates a request for the SODA API datasets. 
    area inputs:
        'community' - community areas
        'census' - census tracts
        
    returns: geopandas coordinates for boundaries
    '''
    #instantiate the Socrata client with the domain
    #optional to input your own app_token to bypass throttling
    client = Socrata(
        domain = 'data.cityofchicago.org', app_token=token
    )
        
    #only return what is needed and simplify the geometry
    #(to minimize download size)
    select = 'geoid10 as area,\
    simplify_preserve_topology(the_geom, 0.0001) AS the_geom'
    
    #make the request
    results = client.get(
        dataset_identifier = '74p9-q2aq', content_type = 'geojson',
        select = select
    )
    
    #turn results into a GeoDataFrame
    gdf = gpd.GeoDataFrame.from_features(results)
    
    return gdf.dropna()

In [12]:
%%time
bounds = get_boundaries(token)

CPU times: user 76.4 ms, sys: 5.33 ms, total: 81.8 ms
Wall time: 1.11 s


In [13]:
map_2 = KeplerGl(height=800)
map_2.add_data(bounds, 'boundaries')
map_2

User Guide: https://github.com/keplergl/kepler.gl/blob/master/docs/keplergl-jupyter/user-guide.md


KeplerGl(data={'boundaries': {'index': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, …

In [14]:
map_2.close()

In [15]:
map_3 = KeplerGl(height=800)
map_3.add_data(bounds, 'boundaries')
map_3.add_data(gdf_point, 'rides')
map_3

User Guide: https://github.com/keplergl/kepler.gl/blob/master/docs/keplergl-jupyter/user-guide.md


KeplerGl(data={'boundaries': {'index': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, …

In [16]:
map_3.close()

In [17]:
bounds.geometry[0].contains(gdf_point.geometry[0])

False

In [18]:
def get_num_rides(bound, points):
    
    tup = points.itertuples()
    
    lst = [ride.rides for ride in tup if bound.contains(ride.geometry)]
    
    return sum(lst)

In [19]:
gdf_point.rides = gdf_point.rides.astype('int32')

In [20]:
bounds['rides'] = bounds.geometry.map(lambda area: get_num_rides(area, gdf_point))

In [21]:
map_4 = KeplerGl(height=700)
map_4.add_data(bounds, 'bounds')
map_4

User Guide: https://github.com/keplergl/kepler.gl/blob/master/docs/keplergl-jupyter/user-guide.md


KeplerGl(data={'bounds': {'index': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, …

In [22]:
map_4.close()

In [23]:
def get_daily_rides(start, end, token=None):
    '''
    Creates a request for the SODA API datasets. 
    
    returns: geopandas coordinates for rides
    '''
    #instantiate the Socrata client
    client = Socrata(
        domain = 'data.cityofchicago.org',
        app_token=token, timeout=100000
    )
    
    #set up the SoSQL call
    select = 'date_trunc_ymd(trip_start_timestamp) as date,\
    pickup_centroid_location AS geometry, COUNT(trip_id) AS rides'
    group = 'date, geometry'
    where = f'date between "{start}" and "{end}"'
    
    #make the call for the data, specifying the correct dataset
    results = client.get(
        dataset_identifier='m6dm-c72p',
        select=select, group=group, where=where,
        content_type = 'geojson', limit=100_000
    )
    client.close()
    
    #add data to geopandas
    gdf = gpd.GeoDataFrame.from_features(results).dropna()
    gdf.rides = gdf.rides.astype('int32')
    
    return gdf

In [24]:
%%time
gdf_daily = get_daily_rides(start, end, token)

CPU times: user 508 ms, sys: 31.4 ms, total: 540 ms
Wall time: 3min 15s


In [25]:
def get_num_rides_daily(points, bounds):
    
    daily_bounds = []

    for date in points.date.unique():
        df = points[points['date'] == date].copy()

        for geo in bounds.geometry:
            tup = df.itertuples()
            rides = sum(
                [point.rides for point in tup if geo.contains(point.geometry)]
            )
            daily_bounds.append([date, geo, rides])
            
    daily_bounds = gpd.GeoDataFrame(
        daily_bounds, columns=['date','geometry','rides']
    )
    
    return daily_bounds

In [26]:
daily_bounds = get_num_rides_daily(gdf_daily, bounds)

In [28]:
map_5 = KeplerGl(height=800)
map_5.add_data(daily_bounds, 'daily_trips')
map_5

User Guide: https://github.com/keplergl/kepler.gl/blob/master/docs/keplergl-jupyter/user-guide.md


KeplerGl(data={'daily_trips': {'index': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,…

In [29]:
map_5.close()

In [30]:
# map_5.save_to_html(data=map_5.data, file_name='chicago.html')