### Import packages

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [53]:
import os
import time
import pathlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


from util import config
from util import mapping
from util import get_data
from util import clean_data

# Get Data from RideWithGPS

The [Ride With GPS API](https://ridewithgps.com/api?lang=en) allows the user to query in multiple ways.  Here, we use two.

**Route Search**

We can enter a variety of search parameters and return some ride summary information, including the Ride ID. Here, we enter a location on which to centre the search ('Shokan, NY') and a radius around that to limit the search (25 miles).

**Trip Details**

Given the Ride IDs, we can request more detailed information - specifically, the Latitude/Longitude/Elevation/Time breadcrumbs along the ride.


In [11]:
# Get data and save to disk

print('Requesting all ride data in search radius')
DATA = get_data.search_ridewithgps('Shokan, NY', 25)
routes, trips = get_data.parse_ridewithgps_search_results(DATA)

Requesting all ride data in search radius
300 of 42216 results collected
600 of 42216 results collected
900 of 42216 results collected
1200 of 42216 results collected
1500 of 42216 results collected
1800 of 42216 results collected
2100 of 42216 results collected
2400 of 42216 results collected
2700 of 42216 results collected
3000 of 42216 results collected
3300 of 42216 results collected
3600 of 42216 results collected
3900 of 42216 results collected
4200 of 42216 results collected
4500 of 42216 results collected
4800 of 42216 results collected
5100 of 42216 results collected
5400 of 42216 results collected
5700 of 42216 results collected
6000 of 42216 results collected
6300 of 42216 results collected
6600 of 42216 results collected
6900 of 42216 results collected
7200 of 42216 results collected
7500 of 42216 results collected
7800 of 42216 results collected
8100 of 42216 results collected
8400 of 42216 results collected
8700 of 42216 results collected
9000 of 42216 results collected
9

AttributeError: module 'util.get_data' has no attribute 'ridewithgps_api_ride'

### Do some preliminary culling and cleaning of the data

* Convert units from SI to miles and feet
* Filter the data to remove trips that are
    * uninterestingly short
    * suspiciously long to be humanly possible
    * likely to be mountain bike routes (e.g. very steep or lots of laps)


In [76]:
trips = pd.read_feather(
    os.path.join(config.RAW_DATA_PATH, 'ridewgps_trips.feather')
)
clean_data.convert_ride_data_from_SI(trips)
# Remove some of the trips e.g. if don't seem plausible for a human to achieve!
trips = clean_data.filter_trips(trips)

# Extract interesting columns
useful_cols = ['id', 'distance', 'elevation_gain', 'elevation_loss',
               'avg_slope', 'avg_speed', 'max_speed',
               'duration', 'moving_time']
trips = trips[useful_cols]
trips.rename(columns={'id': 'rte_id'}, inplace=True) # Distinguish from other IDs keys

# Save cleaned data
pathlib.Path(config.CLEAN_DATA_PATH).mkdir(parents=True, exist_ok=True)
trips.to_feather(
    os.path.join(config.CLEAN_DATA_PATH, 'ridewgps_trips.feather')
)

print('Double checking for nulls')
print(trips.isna().sum())

print('\n\n{} unique trips'.format(trips.shape[0]))
trips.head()

Double checking for nulls
rte_id            0
distance          0
elevation_gain    0
elevation_loss    0
avg_slope         0
avg_speed         0
max_speed         0
duration          0
moving_time       0
dtype: int64


19571 unique trips


Unnamed: 0,rte_id,distance,elevation_gain,elevation_loss,avg_slope,avg_speed,max_speed,duration,moving_time
0,23209396,2.659848,123.437454,106.163973,1.634872,5.002846,7.048852,0.619444,0.531667
1,54372107,2.762263,142.176885,172.413597,2.156983,5.171163,5.960829,0.540556,0.534167
2,48496079,2.777424,480.997881,393.434553,5.962798,5.518063,11.760722,0.535278,0.503333
3,40433541,2.779512,57.147474,41.855717,0.674601,5.19265,5.432662,0.58,0.535278
4,56203693,2.801186,97.503446,88.391781,1.256876,5.349746,6.874868,0.568611,0.523611


For any trips that make it through the filter, request more detailed information from the API.

In [None]:
print('Requesting data for each trip')
trips = pd.read_feather(
    os.path.join(config.CLEAN_DATA_PATH, 'ridewgps_trips.feather')
)
for i, r in trips.iterrows():
    if not i % 100:
        print('{} of {} trips'.format(i, trips.shape[0]))
    _ = get_data.ridewithgps_api_ride(int(r.rte_id), 'trips')
    time.sleep(0.05)

Requesting data for each trip
0 of 19571 trips
