## Objective: I need the summary polyline info for every run completed in Brooklyn & Chicago (up until a specific date). Each run should also have a unique ID.

In [2]:
import pandas as pd
import json
from datetime import datetime
import numpy as np

def getDate(value):
    return datetime.strptime(value.split("T")[0], '%Y-%m-%d')

def getStartLat(value):
    if value==[]:
        return -1
    return value[0]

def getStartLng(value):
    if value==[]:
        return -1
    return value[1]

In [27]:
# data comes from Strava's API. Other options for collecting Strava data here: https://support.strava.com/hc/en-us/articles/216918437-Exporting-your-Data-and-Bulk-Export
df = pd.read_json('../strava-dashboard/data.json')

# data filtering: removing recent activities, non-runs, and activities outside areas of interest

# exclude more recent activities
df['date'] = df['start_date_local'].apply(getDate)
data = df[df['date'] < datetime(2020, 8, 1, 0, 0)]

# exclude non-runs
data = data[data['type']=='Run']

# exclude starting runs outside brooklyn
data['start_lat'] = data['start_latlng'].apply(getStartLat)
data['start_lon'] = data['start_latlng'].apply(getStartLng)

# Brooklyn
nyc = data[(data['start_lat'] < 40.730217) & (data['start_lat'] > 40.647789) & (data['start_lon'] > -74.022393) & (data['start_lon'] < -73.912763)]

# extract summary polyline, add index, and export data
nyc['summary_polyline'] = nyc.applymap(lambda x: x.get('summary_polyline', np.nan) \
                        if isinstance(x, dict) else np.nan)['map']

nyc = nyc[['summary_polyline']]

for i in nyc.index:
    nyc['id'][i] = i
    
nyc.to_csv("data/nyc.csv", index=False)

# Chicago
chicago = data[(data['start_lat'] < 41.978726) & (data['start_lat'] > 41.87322) & (data['start_lon'] > -87.74023299999999) & (data['start_lon'] < -87.598619)]

# extract summary polyline, add index, and export data
chicago['summary_polyline'] = chicago.applymap(lambda x: x.get('summary_polyline', np.nan) \
                        if isinstance(x, dict) else np.nan)['map']

chicago = chicago[['summary_polyline']]

chicago['id'] = 0
for i in chicago.index:
    chicago['id'][i] = i
    
chicago.to_csv("data/chicago.csv", index=False)