In [1]:
from sodapy import Socrata
import geopandas as gpd
import json
from keplergl import KeplerGl

In [2]:
#insert your own token to avoid throttling
#found at https://data.cityofchicago.org/profile
token = 'OfytEmI12Fl0Yq13HTVq7ROlF'

In [3]:
def get_boundaries(dataset, token=None):
    '''
    Creates a request for the SODA API datasets. 
    area inputs:
        'community' - community areas
        'census' - census tracts
        
    returns: geopandas coordinates for boundaries
    '''
    #instantiate the Socrata client with the domain
    #optional to input your own app_token to bypass throttling
    client = Socrata(
        domain = 'data.cityofchicago.org',
        app_token=token
    )
    #correct column name for the dataset called
    if dataset == 'census':
        dataset = '74p9-q2aq'
        area = 'geoid10'
    else:
        dataset = 'igwz-8jzy'
        area = 'area_numbe'
        
    #only return what is needed and simplify the geometry
    #(to minimize download size)
    select = f'''{area} as area,\
    simplify_preserve_topology(the_geom, 0.0001) AS the_geom'''
    
    #make the request
    results = client.get(
        dataset_identifier = dataset,
        content_type = 'geojson',
        select = select
    )
    
    #turn results into a GeoDataFrame
    gdf = gpd.GeoDataFrame.from_features(results)
    
    return gdf

bounds = get_boundaries('census', token)

In [23]:
def get_rides(dataset, start, end, token=None, daily = False):
    '''
    Creates a request for the SODA API datasets. 
    dataset inputs:
        'community' - community areas
        'census' - census tracts
    
    returns: geopandas coordinates for boundaries
    '''
    if dataset == 'census':
        area = 'pickup_census_tract'
    else:
        area = 'pickup_community_area'
    
    client = Socrata(
        domain = 'data.cityofchicago.org',
        app_token=token, timeout=100000
    )
    
    #set up query to return grouping by days
    if daily:
        select = f'date_trunc_ymd(trip_start_timestamp) as date,\
        {area} AS area, COUNT(trip_id) as rides'
        group = 'date, area'
        where = f'date between "{start}" and "{end}"'
    else:
        select = f'{area} AS area, COUNT(trip_id) as rides'
        group = 'area' 
        where = f'trip_start_timestamp between "{start}" and "{end}"'

    results = client.get(
        dataset_identifier='m6dm-c72p',
        select=select, group=group, where=where,
        content_type = 'json', limit=100_000
    )

    gdf = gpd.GeoDataFrame(results)
    
    return gdf

In [25]:
%%time
#show run time

#make the call for 
gdf = get_rides('census', '2019-03-01', '2019-03-31', token, daily=True)

#merge the two dataframes to add geometry
gdf = gdf.merge(bounds, on='area')

CPU times: user 113 ms, sys: 17.6 ms, total: 131 ms
Wall time: 2min 35s


In [6]:
def open_config(file_name='config.json'):
    '''
    helper function to load preconfigured Kepler configs
    '''
    try: 
        current_file = json.load(open(file_name, 'r'))
        print('config loaded')
        return current_file
    except FileNotFoundError:
        print('no current config file. plz initialize map, and use map.config')
        return {}

In [7]:
config = open_config()

no current config file. plz initialize map, and use map.config


In [17]:
map_1 = KeplerGl(height=800)
map_1.add_data(gdf, 'trips')
map_1

User Guide: https://github.com/keplergl/kepler.gl/blob/master/docs/keplergl-jupyter/user-guide.md


KeplerGl(data={'trips': {'index': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 2…

In [None]:
def save_config(config, file_name='config.json'):
    '''
    helper function to save the current config for Kepler
    '''
    def save():
        with open(file_name, 'w') as f:
                json.dump(config, f)
    try:
        current_file = json.load(open(file_name, 'r'))
        if config != current_file:
            print('changes detected. saving current config')
            save()
        else:
            print('no changes detected')
            
    except FileNotFoundError:
        print('no existing file in directory. saving current config')
        save()

save_config(map_1.config, file_name='community.json')

In [None]:
map_1.save_to_html(data=map_1.data, file_name='chicago.html')