# Library import

In [1]:
import requests
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [2]:
import pandas as pd
from pandas.io.json import json_normalize
import matplotlib.pyplot as plt
from tqdm import tqdm
import time
import random
from datetime import datetime as dt

# Activity import via Strava API

In [None]:
#Gets Strava ride activity data from our API request
auth_url = "https://www.strava.com/oauth/token"
activites_url = "https://www.strava.com/api/v3/athlete/activities"

payload = {
    'client_id': "xxxxx",
    'client_secret': 'xxxxxx',
    'refresh_token': 'xxxxxxx',
    'grant_type': "refresh_token",
    'f': 'json'
}

print("Requesting Token...\n")
res = requests.post(auth_url, data=payload, verify=False)
access_token = res.json()['access_token']
print("Access Token = {}\n".format(access_token))

print("Requesting pages (200 activities per full page)...")
activities_df = pd.DataFrame()
page = 1
page_non_empty = True
while page_non_empty:
    header = {'Authorization': 'Bearer ' + access_token}
    param = {'per_page': 200, 'page': page}
    my_activities = requests.get(activites_url, headers=header, params=param).json()
    activities_df = activities_df.append(my_activities, ignore_index=True)
    page_non_empty = bool(my_activities)
    print(page)
    page += 1

print("\n", len(activities_df), "activities downloaded")

In [None]:
print("Requesting Token...\n")
res = requests.post(auth_url, data=payload, verify=False) # auth_url & payload referenced in code already above
access_token = res.json()['access_token']
header = {'Authorization': 'Bearer ' + access_token}
print("Access Token = {}\n".format(access_token))

count = 0
print("Calculating time to get....\n")
for index, row in activities_df.iterrows():
    if row['type'] == 'Ride':
        count += 1
print(count * 8 / 60, " minutes to obtain data")

print("Getting trails for each activity")
activities_df['trails'] = None
for index, row in tqdm(activities_df.iterrows(), total=activities_df.shape[0]):
    get_activity_url = "https://www.strava.com/api/v3/activities/{}".format(row['id'])
    if row['type'] == 'Ride':
        activity_df = pd.DataFrame()
        try:
            my_activity = requests.get(get_activity_url, headers=header).json() 
            activity_df = pd.json_normalize(my_activity, record_path=['segment_efforts'])
            if len(activity_df.index) > 0:
                trail_list = activity_df['name'].tolist()
                activities_df.at[index, 'trails'] = trail_list
            time.sleep(random.randint(7, 9))
        except requests.exception.RequestException as e:
            raise SystemExit(e)

In [None]:
activities_df.head(5)

# Data Cleaning

## Checking data

In [None]:
activities_df.columns

In [None]:
activities_df[['average_speed', 'max_speed']].head(5)

In [None]:
activities_df['average_cadence'].head(5)

In [None]:
activities_df[['average_watts', 'weighted_average_watts', 'kilojoules', 'device_watts', 'max_watts']].head(5)

In [None]:
activities_df[['total_elevation_gain', 'elev_high', 'elev_low']].head(5)

## Adding columns based on manipulated data

In [None]:
#Converting distance from meters to miles
activities_df['distance_miles'] = activities_df['distance'] / 1609.344
#Converting elevation from meters to feet
activities_df['elevation_gain_ft'] = activities_df['total_elevation_gain'] * 3.28084
activities_df['max_elev_ft'] = activities_df['elev_high'] * 3.28084
activities_df['min_elev_ft'] = activities_df['elev_low'] * 3.28084
#Converting time from seconds to hours
activities_df['moving_time'] = activities_df['moving_time'] * 0.0002777778
activities_df['elapsed_time'] = activities_df['elapsed_time'] * 0.0002777778
#Converting speed from meters/second to mph
activities_df['average_speed_mph'] = activities_df['average_speed'] * 2.236936
activities_df['max_speed_mph'] = activities_df['max_speed'] * 2.236936

activities_df.head(5)

## Date Manipulation

In [None]:
activities_df[['start_date', 'start_date_local', 'timezone', 'utc_offset']].head(5)

In [None]:
#New date only column
activities_df['start_date_only'] = activities_df['start_date_local'].str[0:10]
activities_df.head(5)

## Creating new dataframe for "Rides" and "Virtual Rides" only

In [None]:
activities_df.head()

In [None]:
activities_df['type'].unique()

In [73]:
#Create list of values I want to drop
drop_values = ['Workout', 'Run', 'Walk', 'RockClimbing']

In [None]:
#Create new dataframe by passing in the list of values and setting them equal to False
activities_df_rides = activities_df[activities_df.type.isin(drop_values) == False]
activities_df_rides['type'].unique()

## Temp C to F

In [None]:
activities_df_rides['average_temp_F'] = activities_df['average_temp'] * 1.8 + 32
activities_df_rides.head()

In [None]:
activities_df_rides.columns

In [None]:
activities_df_rides = activities_df_rides.drop(columns=['resource_state', 'athlete', 'start_date', 'start_date_local', 'timezone', 'utc_offset', 'location_city', 'location_state', 'location_country', 'start_latitude', 'start_longitude', 'photo_count', 'trainer', 'commute', 'manual', 'private', 'visibility', 'flagged', 'gear_id', 'from_accepted_tag', 'workout_type', 'has_kudoed', 'total_photo_count', 'device_watts', 'has_heartrate', 'heartrate_opt_out', 'display_hide_heartrate_option',])

In [None]:
activities_df_rides = activities_df_rides.drop(columns=['id', 'external_id', 'upload_id','upload_id_str'])

In [None]:
activities_df_rides.columns

# Export to csv

In [109]:
activities_df_rides.to_csv('ride_data.csv', index = False) #creating new csv file with the "Ride" data. Naming it and getting rid of the index column.

# Segments

In [None]:
for trail in activities_df['trails'].head(2):
    print(trail)