In [2]:
# Project idea -> add workout counter in look studio google
# Packages required for this project:
import requests
import pandas as pd
import gspread
from df2gspread import df2gspread as d2g
import gspread_dataframe as gd
import pygsheets
from datetime import date
import json

# Package to save datasets to be used in another script for next analysis
from sklearn import datasets

# enableing max columns for dataframes
pd.set_option('display.max_columns', None)

# Importing credentials for Strava's API
from Credentials import StravaCredentials

In [2]:
# Creating date variable
today = date.today().strftime('%B/%m/%Y')

# Input to store temporary code from -> #https://www.strava.com/oauth/authorize?client_id=99205&response_type=code&redirect_uri=http://localhost/&approval_prompt=force&scope=profile:read_all,activity:read_all
data = StravaCredentials.data

In [None]:
# Getting refresh token
token = requests.post(url= 'https://www.strava.com/api/v3/oauth/token',data=data).json()

# Accessing the token json to get refresh token and access token
refresh_token = token['refresh_token']
access_token = token['access_token']
access_token

In [None]:
# Getting athlete info
athlete_url = 'https://www.strava.com/api/v3/athlete'
athlete = requests.get(athlete_url+'?access_token='+access_token).json()

# Covnerting json into dataframe and doing some data cleaning
df_athlete = pd.json_normalize(athlete)
df_athlete = df_athlete.T
df_athlete = df_athlete.reset_index()
df_athlete = df_athlete.rename(columns={0:'info'})
df_athlete

In [None]:
# Setting up url and page
# This API request gives us the list of activities. 
# The table lacks certain details that we will get from another API request
page = 1
url = "https://www.strava.com/api/v3/activities"
access_token = token['access_token']
# Create the dataframe ready for the API call to store your activity data
activities = pd.DataFrame()
while True:
    # get page of activities from Strava
    print('Getting page number:',page)
    r = requests.get(url + '?access_token=' + access_token + '&per_page=200' + '&page=' + str(page))
    r = r.json()
    print(f'Extraction of page {page} Complete')
    # if no results then exit loop
    if (not r):
        print('Extration Done')
        break
    r = pd.json_normalize(r)
    activities = activities.append(r)
    
    page += 1

clean_activities = activities[['id',
'name',
'distance',
'elapsed_time',
'total_elevation_gain',
'sport_type',
'start_date','achievement_count',
'athlete_count',
'start_latlng',
'end_latlng',
'average_speed',
'max_speed',
'average_temp',
'average_heartrate',
'max_heartrate',
'average_cadence',
'elev_high',
'elev_low']]

In [None]:
# Checkpoint so I don't have to run API get request
#activities_copy = pd.read_csv(r'C:\Users\Manuel Elizaldi\Desktop\Learning-Testing\StravaAPI\Outputs\clean_activities.csv')

# Creating copy of activitieas dataframe and renaming some columns
general_activities_copy = clean_activities.copy()
general_activities_copy = general_activities_copy.rename(columns={'elapsed_time':'workout_time_sec','sport_type':'workout_type'})
general_activities_copy.head()

In [None]:
# Cleaning dataframe and converting units 
# Aprox calories burned during workout calculation
# Formual -> CB = T * (0.6309*H + 0.1988*W + 0.2017*A - 55.0969) / 4.184
# CB ->  is the number of calories burned
# T ->  is the duration of exercise in minutes
# H ->  is your average heart rate in beats per minute
# W -> is your weight in kilograms = 80
# A -> is your age in years
general_activities_copy['aprox_calories_burned'] = round((general_activities_copy['workout_time_sec']/60) * ((0.6309*general_activities_copy['average_heartrate']) + (0.1988*80) + (0.2017*26 - 55.0969)) / 4.184,0)

# from meters to kilometers
general_activities_copy[['distance']] = round(general_activities_copy['distance']/1000,2)

# from seconds to minutes
general_activities_copy['workout_time_min'] = round(general_activities_copy['workout_time_sec']/60,2)

# Fix start_date column into the correct format
general_activities_copy[["start_date"]] = pd.to_datetime(general_activities_copy['start_date']).dt.date

# Changing name of workout type => Workout
general_activities_copy['workout_type'] = general_activities_copy['workout_type'].replace({'Workout':'Functional-Cardio Workout'})

# Preview
print('Number of recorded workouts:',len(general_activities_copy))
general_activities_copy.head()

In [None]:
# Creating additional dataframes for specific activities:
# Running type workouts
running_activities = general_activities_copy.loc[general_activities_copy['workout_type'].isin(['Run','TrailRun'])]

# Biking type workouts
biking_activities = general_activities_copy.loc[general_activities_copy['workout_type'].isin(['Ride','MountainBikeRide'])]

# Functional type workouts
functional_activities = general_activities_copy.loc[general_activities_copy['workout_type'].isin(['Functional-Cardio Workout'])]

In [None]:
# Variables for activities breakdown dataframe 
today_msg = f'Total workouts as of {today}'
total_workouts = len(general_activities_copy)

# Breakdown of workout types:
new = [today_msg,total_workouts]
activities_breakdown = general_activities_copy['workout_type'].value_counts().rename_axis('Sport').reset_index(name='Count')
activities_breakdown = activities_breakdown.append(pd.Series(new, index=['Sport','Count']), ignore_index=True)
activities_breakdown

In [None]:

# Creating list of ids of these activities to use in the detailed workout api request 
running_id_list = list(running_activities['id'])
biking_id_list = list(biking_activities['id'])
functional_id_list = list(functional_activities['id'])

# General statistics
first_recorded_workout = min(general_activities_copy['start_date'])
most_recent_workout=max(general_activities_copy['start_date'])
average_workout_duration=round(general_activities_copy['workout_time_min'].mean(),2)
average_calories_burned_per_workout=round(general_activities_copy['aprox_calories_burned'].mean(),0)
average_distance_ran=round(running_activities['distance'].mean(),0)
average_biking_distance=round(biking_activities['distance'].mean(),0)

# Storing average calories for later analysis comparing calories from garmin, strava and my calculated value
%store average_calories_burned_per_workout

# Creating dataframe from general statistics variables
# Create the DataFrame
general_stats_df = pd.DataFrame({
    'First Workout':first_recorded_workout,
    'Most Recent Workout': most_recent_workout,
    'Average Workout Duration in Minutes':average_workout_duration,
    'Average Calories Burned Per Workout':average_calories_burned_per_workout,
    'Average Distance Ran in Kilometers':average_distance_ran,
    'Average Biking Distance in Kilometers':average_biking_distance
},index=['Info'])

# Transposing dataframe, setting new index and column
general_stats_df = general_stats_df.T
general_stats_df = general_stats_df.reset_index()
general_stats_df = general_stats_df.rename(columns={'index':'Info','Info':'Data'})
general_stats_df

In [None]:
# General statistics - pending incorporate some of this numbers to the avobe graph
# Pending Analysis
activities_copy[['distance','workout_time_min','average_speed','max_speed','average_heartrate','average_temp','max_heartrate','average_cadence','aprox_calories_burned']].describe()

In [None]:
req = requests.get(url = f'https://www.strava.com/api/v3/activities/8633145882?access_token='+access_token)
req.status_code == 200

In [None]:
# Get detailed view of workouts function:
# This function will get the data for each workout, if it reaches the API request limit it will stop the process
# The API rate limit allows us to do 100 requests for each 15 mintues. To prvent passing this limit we only grab -
# - the most recent 100 workouts from each list.
def GetWorkoutData(workout_list):
    workout_info = []
    workout_num = 1
    if len(workout_list)>100:
        print('This workout list is too large, reducing to the 100 most recent workouts.')
        workout_list = workout_list[:100]
        for i in workout_list:
            print('Extracting workout:', workout_num)
            req = requests.get(url = f'https://www.strava.com/api/v3/activities/{i}?access_token='+access_token)
            if req.status_code == 200:
                req = req.json()
                workout_info.append(req)
                workout_num += 1
            else:
                print('API Rate limit exceeded, stopping extraction')
                break
    else:
        for i in workout_list:
            print('Extracting workout:',workout_num)
            req = requests.get(url = f'https://www.strava.com/api/v3/activities/{i}?access_token='+access_token)
            if req.status_code == 200:
                req = req.json()
                workout_info.append(req)
                workout_num += 1
            else:
                print('API Rate limit exceeded, stopping extraction')
                break


    return workout_info 

In [None]:
pd.json_normalize(requests.get(url = f'https://www.strava.com/api/v3/activities/8611884034?access_token='+access_token).json())

In [None]:
386/60

In [None]:
# Using the function to get a json of the detailed data for functional, running and biking workouts
# Make sure to run one at a time every 15 minutes to not reach API rate limit
functional_workouts = GetWorkoutData(functional_id_list)
running_workouts = GetWorkoutData(running_id_list)
biking_workouts = GetWorkoutData(biking_id_list)

# saving a copy for further analysis
#functional_workouts_json = functional_workouts

In [None]:
# Getting laps from the functional workouts dataframe
functional_workouts_laps = pd.json_normalize(functional_workouts,'laps')
functional_workouts_laps = functional_workouts_laps.rename(columns={'activity.id':'activity_id'})
functional_workouts_laps

In [None]:
functional_workouts_df = pd.json_normalize(functional_workouts)
functional_workouts_df

In [None]:
functional_workouts_laps = functional_workouts_laps[functional_workouts_laps.activity_id != 8611884034]
functional_workouts_laps['activity_id'].value_counts()

In [None]:
functional_workouts_df = pd.json_normalize(functional_workouts)
functional_workouts_df.head()

# Checkpoint -> connect to google drive api -> then connect to google looker

#### article in stackoverflow to set up function to upload data to googlesheet:
#### https://stackoverflow.com/questions/62917910/how-can-i-export-pandas-dataframe-to-google-sheets-using-python

In [None]:
# This function uses gspread and pygsheets modules to upload data to google sheets
def WriteToGsheet(service_file_path, spreadsheet_id, sheet_name, data_df):
    """
    this function takes data_df and writes it under spreadsheet_id
    and sheet_name using your credentials under service_file_path
    """
    gc = pygsheets.authorize(service_file=service_file_path)
    sh = gc.open_by_key(spreadsheet_id)
    try:
        sh.add_worksheet(sheet_name)
    except:
        pass
    wks_write = sh.worksheet_by_title(sheet_name)
    wks_write.clear('A1',None,'*')
    wks_write.set_dataframe(data_df, (1,1), encoding='utf-8', fit=True)
    wks_write.frozen_rows = 1
    
# Setting up parameters for write_to_gsheet function
service_file_path = r'C:\Users\Manuel Elizaldi\Desktop\Learning-Testing\StravaAPI\Credentials\pacific-castle-303123-909a5ddcda92.json'
spreadsheet_id = '1pomkAzlndHBl_czERrwKkoZFUkJRGFjyhRTeoWA6CS4'
   

In [None]:
general_activities_copy

In [None]:
# Uploading functional workouts to google sheets:
functional_workouts_sheet = 'specific_data' 
WriteToGsheet(service_file_path,spreadsheet_id,functional_workouts_sheet,functional_workouts_df)

In [None]:
# Uploading general stats dataframe to google sheets:
general_stats_sheet = 'General_Statistics' 
WriteToGsheet(service_file_path,spreadsheet_id,general_stats_sheet,general_stats_df)

In [None]:
# Uploading activities breakdown to google sheets:
activities_breakdown_sheet = 'Activities_Breakdown'
WriteToGsheet(service_file_path,spreadsheet_id,activities_breakdown_sheet,activities_breakdown)

In [None]:
#creating checkpoint !
# In this example, the my_list variable represents the list that we want to save as a JSON file. We use the open() function to open a new file called "my_list.json" in write mode, and then use the json.dump() method to write the list to the file in JSON format.
with open('functional_workouts_json.json','w') as f:
    json.dump(functional_workouts_json,f)
# The with statement is used here to automatically close the file when we're done writing to it. The "w" parameter in the open() function specifies that we're opening the file in write mode. If the file already exists, its contents will be overwritten. If it doesn't exist, a new file will be created.
# f = open(r'C:\Users\Manuel Elizaldi\Desktop\Learning-Testing\StravaAPI\Outputs\functional_workouts_json.json')
# functional_workouts_json = json.load(f)