In [2]:
# Project idea -> add workout counter in look studio google
# Packages required for this project:
import requests
import pandas as pd
import gspread
from df2gspread import df2gspread as d2g
import gspread_dataframe as gd
import pygsheets
from datetime import date
import json
import webbrowser


# Package to save datasets to be used in another script for next analysis
from sklearn import datasets

# enableing max columns for dataframes
pd.set_option('display.max_columns', None)

# Importing credentials for Strava's API
from Credentials import StravaCredentials

# Creating date variable
today = date.today().strftime('%B/%d/%Y')

In [3]:
# Pending, this is not working, not extracting StravaCredentials variable:
# Copy and paste this link in your browser and extract code -> #https://www.strava.com/oauth/authorize?client_id=99205&response_type=code&redirect_uri=http://localhost/&approval_prompt=force&scope=profile:read_all,activity:read_all
data = StravaCredentials.data

# In case you need to get the code again uncomment these:
# webbrowser.open(f"https://www.strava.com/oauth/authorize?client_id={data['client_id']}&response_type=code&redirect_uri=http://localhost/&approval_prompt=force&scope=profile:read_all,activity:read_all")
# data['code'] = input("From the web broswer enter the code:")

In [9]:
# Creating function that returns the access token that is used in the other api calls
def GetToken(data):
    token = requests.post(url= 'https://www.strava.com/api/v3/oauth/token',data=data).json()
    access_token = token['access_token']
    
    return access_token

In [4]:
# Getting refresh and access token
token = requests.post(url= 'https://www.strava.com/api/v3/oauth/token',data=data).json()

# Accessing the token json to get refresh token and access token
refresh_token = token['refresh_token']
access_token = token['access_token']

In [6]:
# Getting athlete info
athlete_url = 'https://www.strava.com/api/v3/athlete'
athlete = requests.get(athlete_url+'?access_token='+access_token).json()

# Covnerting json into dataframe and doing some data cleaning
df_athlete = pd.json_normalize(athlete)
df_athlete = df_athlete.T
df_athlete = df_athlete.reset_index()
df_athlete = df_athlete.rename(columns={0:'info'})
df_athlete

Unnamed: 0,index,info
0,id,58832723
1,username,blindhydra_athlete
2,resource_state,3
3,firstname,Manuel
4,lastname,Elizaldi
5,bio,
6,city,Austin
7,state,Texas
8,country,United States
9,sex,M


In [7]:
# This API request gives us the general list of activities. 
# The table lacks certain details that we will get from another API request 
page = 1
url = "https://www.strava.com/api/v3/activities"
access_token = token['access_token']
# Create the dataframe ready for the API call to store your activity data
activities = pd.DataFrame()
while True:
    # get page of activities from Strava
    print('Getting page number:',page)
    r = requests.get(url + '?access_token=' + access_token + '&per_page=200' + '&page=' + str(page))
    r = r.json()
    print(f'Extraction of page {page} Complete')
    # if no results then exit loop
    if (not r):
        print('Extration Done')
        break
    r = pd.json_normalize(r)
    activities = activities.append(r) # type: ignore
    
    page += 1

    clean_activities = activities[['id',
    'name',
    'distance',
    'elapsed_time',
    'total_elevation_gain',
    'sport_type',
    'start_date','achievement_count',
    'athlete_count',
    'start_latlng',
    'end_latlng',
    'average_speed',
    'max_speed',
    'average_temp',
    'average_heartrate',
    'max_heartrate',
    'average_cadence',
    'elev_high',
    'elev_low']]

In [None]:
# Checkpoint so I don't have to run API get request
#table = pd.read_csv(r'C:\Users\Manuel Elizaldi\Desktop\Learning-Testing\StravaAPI\Outputs\clean_activities.csv')

# Creating copy of activitieas dataframe and renaming some columns
clean_activities = GetWorkouts(access_token)
general_table = clean_activities.copy()
general_table = general_table.rename(columns={'elapsed_time':'workout_time_sec','sport_type':'workout_type'})
general_table.head()

In [None]:
# Cleaning dataframe and converting units 
# Aprox calories burned during workout calculation
# Formual -> CB = T * (0.6309*H + 0.1988*W + 0.2017*A - 55.0969) / 4.184
# CB ->  is the number of calories burned
# T ->  is the duration of exercise in minutes
# H ->  is your average heart rate in beats per minute
# W -> is your weight in kilograms = 80
# A -> is your age in years
general_table['aprox_calories_burned'] = round((general_table['workout_time_sec']/60) * ((0.6309*general_table['average_heartrate']) + (0.1988*80) + (0.2017*26 - 55.0969)) / 4.184,0)

# from meters to kilometers
general_table[['distance']] = round(general_table['distance']/1000,2)

# from seconds to minutes
general_table['workout_time_min'] = round(general_table['workout_time_sec']/60,2)

# Fix start_date column into the correct format
general_table[["start_date"]] = pd.to_datetime(general_table['start_date']).dt.date

# Changing name of workout type => Workout
general_table['workout_type'] = general_table['workout_type'].replace({'Workout':'Functional-Cardio Workout'})

# Preview
print('Number of recorded workouts:',len(general_table))
general_table.head()

In [None]:
# Creating additional dataframes for specific activities:
# Running type workouts
running_activities = general_table.loc[general_table['workout_type'].isin(['Run','TrailRun'])]

# Biking type workouts
biking_activities = general_table.loc[general_table['workout_type'].isin(['Ride','MountainBikeRide'])]

# Functional type workouts
functional_activities = general_table.loc[general_table['workout_type'].isin(['Functional-Cardio Workout'])]

In [None]:
# Variables for activities breakdown dataframe 
today_msg = f'Total workouts as of {today}'
total_workouts = len(general_table)

# Breakdown of workout types:
new = [today_msg,total_workouts]
activities_breakdown = general_table['workout_type'].value_counts().rename_axis('Sport').reset_index(name='Count')
activities_breakdown = activities_breakdown.append(pd.Series(new, index=['Sport','Count']), ignore_index=True)
activities_breakdown

In [None]:
# General statistics
first_recorded_workout = min(general_table['start_date'])
most_recent_workout=max(general_table['start_date'])
average_workout_duration=round(general_table['workout_time_min'].mean(),2)
aprox_average_calories_burned_per_workout=round(general_table['aprox_calories_burned'].mean(),0)
average_distance_ran=round(running_activities['distance'].mean(),0)
average_biking_distance=round(biking_activities['distance'].mean(),0)

# Storing average calories for later analysis comparing calories from garmin, strava and my calculated value
# %store average_calories_burned_per_workout

# Creating dataframe from general statistics variables
# Create the DataFrame
general_stats_df = pd.DataFrame({
    'First Workout':first_recorded_workout,
    'Most Recent Workout': most_recent_workout,
    'Average Workout Duration in Minutes':average_workout_duration,
    'Approximate Average Calories Burned Per Workout':aprox_average_calories_burned_per_workout,
    'Average Distance Ran in Kilometers':average_distance_ran,
    'Average Biking Distance in Kilometers':average_biking_distance
},index=['Info'])

# Transposing dataframe, setting new index and column
general_stats_df = general_stats_df.T
general_stats_df = general_stats_df.reset_index()
general_stats_df = general_stats_df.rename(columns={'index':'Info','Info':'Data'})
general_stats_df

In [None]:
# Creating list of ids of these activities to use in the detailed workout api request 
running_id_list = list(running_activities['id'])
biking_id_list = list(biking_activities['id'])
functional_id_list = list(functional_activities['id'])
most_recent_id_lsit = list(general_table['id'][0:100])

In [None]:
# Get detailed view of workouts function:
# This function will get the data for each workout, if it reaches the API request limit it will stop the process
# The API rate limit allows us to do 100 requests for each 15 mintues. To prvent passing this limit we only grab -
# - the most recent 100 workouts from each list.
def GetWorkoutData(workout_list):
    workout_info = []
    workout_num = 1
    if len(workout_list)>100:
        print('This workout list is too large, reducing to the 100 most recent workouts.')
        workout_list = workout_list[:100]
        for i in workout_list:
            print('Extracting workout:', workout_num)
            req = requests.get(url = f'https://www.strava.com/api/v3/activities/{i}?access_token='+access_token)
            if req.status_code == 200:
                req = req.json()
                workout_info.append(req)
                workout_num += 1
            else:
                print('Error in authorization or API limit exceeded, stopping extraction')
                break
    else:
        for i in workout_list:
            print('Extracting workout:',workout_num)
            req = requests.get(url = f'https://www.strava.com/api/v3/activities/{i}?access_token='+access_token)
            if req.status_code == 200:
                req = req.json()
                workout_info.append(req)
                workout_num += 1
            else:
                print('Error in authorization or API limit exceeded, stopping extraction')
                break
    return workout_info

### Using the function to get a json of the detailed data for functional, running and biking workouts
#### run every 15 minutes

In [None]:
running_workouts = GetWorkoutData(running_id_list)

In [None]:
biking_workouts = GetWorkoutData(biking_id_list)

In [None]:
functional_workouts = GetWorkoutData(functional_id_list)

In [None]:
# This function will parse the workout json, grab the relevant columns, clean the units and create a lap counter for the final dataframe
def CleanWorkoutJson(workout_json):
    df = pd.json_normalize(workout_json)
    df[['distance']] = round(df['distance']/1000,2)
    df['workout_time_min'] = round(df['elapsed_time']/60,2)
    df[["start_date"]] = pd.to_datetime(df['start_date']).dt.date
    df = df.rename(columns={'id':'activity_id'})
    df = df[['activity_id',
               'name',
               'start_date',
               'sport_type',
               'distance',
               'workout_time_min',
               'calories',
               'total_elevation_gain',
               'start_latlng',
               'end_latlng',
               'average_speed',
               'max_speed',
               'average_temp',
               'average_heartrate',
               'max_heartrate']]
    
    workout_laps = pd.json_normalize(workout_json,'laps')
    workout_laps = workout_laps[['activity.id','name','elapsed_time','distance','average_heartrate','max_heartrate','average_speed','max_speed']]
    workout_laps = workout_laps.rename(columns={'activity.id':'activity_id',
                                                'name':'lap',
                                                'elapsed_time':'lap_elapsed_time_min',
                                                'distance':'lap_distance',
                                                'average_heartrate':'lap_average_heartrate',
                                                'max_heartrate':'lap_max_heartrate',
                                                'average_speed':'lap_average_speed',
                                                'max_speed':'lap_max_speed'})
    workout_laps['lap_elapsed_time_min'] = round(workout_laps['lap_elapsed_time_min']/60,2)
    workout_laps['lap_distance'] = round(workout_laps['lap_distance']/1000,2)

    avg_time_per_lap = workout_laps.groupby('activity_id').mean()
    avg_time_per_lap = avg_time_per_lap.reset_index()
    avg_time_per_lap = avg_time_per_lap[['activity_id','lap_elapsed_time_min']]
    avg_time_per_lap = avg_time_per_lap.rename(columns={'lap_elapsed_time_min':'avg_time_per_lap'})    
    
    lap_counter = workout_laps['activity_id'].value_counts().rename_axis('activity_id').reset_index(name='lap_count')
    
    lap_stats = avg_time_per_lap.merge(lap_counter,on='activity_id')
    
    merged = df.merge(lap_stats, on = 'activity_id')
    merged['lap_count'] = pd.to_numeric(merged['lap_count'])
    return merged

In [None]:
# Function that calculates stats from workout dataframe
def DescribeWorkoutdf(workout_df):
    first_workout = min(workout_df['start_date'])
    last_workout=max(workout_df['start_date'])
    avg_workout_duration=round(workout_df['workout_time_min'].mean(),2)
    avg_calories_burned_per_workout=workout_df['calories'].mean()
    avg_distance=round(workout_df['distance'].mean(),0)
    workout_counter = len(workout_df)
    avg_laps = round(workout_df['lap_count'].mean(),0)

    # Creating dataframe from general statistics variables
    # Create the DataFrame
    grl_stats_df = pd.DataFrame({
        'First Recorded Workout:':first_workout,
        'Most Recent Workout': last_workout,
        'Average Workout Duration in Minutes':avg_workout_duration,
        'Average Calories Burned Per Workout':avg_calories_burned_per_workout,
        'Average Distance in Kilometers':avg_distance,
        'Number of Workouts:': workout_counter,
        'Average Number of Laps':avg_laps
    },index=['Info'])

    # Transposing dataframe, setting new index and column
    grl_stats_df = grl_stats_df.T
    grl_stats_df = grl_stats_df.reset_index()
    grl_stats_df = grl_stats_df.rename(columns={'index':'Info','Info':'Data'})
    return grl_stats_df

In [None]:
# Functional workouts df:
functional_workouts_df = CleanWorkoutJson(functional_workouts)
functional_workouts_df.head(5)

In [None]:
# Functional workouts description:
functional_workouts_desc = DescribeWorkoutdf(functional_workouts_df)
functional_workouts_desc

In [None]:
running_workouts_df = CleanWorkoutJson(running_workouts)
running_workouts_df.head(5)

In [None]:
running_workouts_desc = DescribeWorkoutdf(running_workouts_df)
running_workouts_desc

In [None]:
biking_workouts_df = CleanWorkoutJson(biking_workouts)
biking_workouts_df.head(5)

In [None]:
biking_workouts_desc = DescribeWorkoutdf(biking_workouts_df)
biking_workouts_desc

### Uploading to google drive

In [None]:
# This function uses gspread and pygsheets modules to upload data to google sheets
def WriteToGsheet(service_file_path, spreadsheet_id, sheet_name, data_df):
    """
    this function takes data_df and writes it under spreadsheet_id
    and sheet_name using your credentials under service_file_path
    """
    gc = pygsheets.authorize(service_file=service_file_path)
    sh = gc.open_by_key(spreadsheet_id)
    try:
        sh.add_worksheet(sheet_name)
    except:
        pass
    wks_write = sh.worksheet_by_title(sheet_name)
    wks_write.clear('A1',None,'*')
    wks_write.set_dataframe(data_df, (1,1), encoding='utf-8', fit=True)
    wks_write.frozen_rows = 1
    
# Setting up parameters for write_to_gsheet function
service_file_path = r'C:\Users\Manuel Elizaldi\Desktop\Learning-Testing\Workout-Analysis-API\Credentials\pacific-castle-303123-909a5ddcda92.json'
spreadsheet_id = '1pomkAzlndHBl_czERrwKkoZFUkJRGFjyhRTeoWA6CS4'

In [None]:
sheet_name = 'General_Table'
WriteToGsheet(service_file_path, spreadsheet_id,sheet_name,general_table)
general_table.head(1)

In [None]:
sheet_name = 'Activities_Breakdown'
WriteToGsheet(service_file_path, spreadsheet_id,sheet_name,activities_breakdown)
activities_breakdown

In [None]:
sheet_name = 'General_Statistics'
WriteToGsheet(service_file_path, spreadsheet_id,sheet_name,general_stats_df)
general_stats_df

In [None]:
# Uploading functional workouts to google sheets:
sheet_name = 'Functional_Workouts' 
WriteToGsheet(service_file_path,spreadsheet_id,sheet_name,functional_workouts_df)
functional_workouts_df.head(1)

In [None]:
sheet_name = 'Functional_Workouts_Desc'
WriteToGsheet(service_file_path,spreadsheet_id,sheet_name,functional_workouts_desc)
functional_workouts_desc

In [None]:
sheet_name = 'Running_Workouts'
WriteToGsheet(service_file_path,spreadsheet_id,sheet_name,running_workouts_df)
running_workouts_df.head(1)

In [None]:
sheet_name = 'Running_Workouts_Desc'
WriteToGsheet(service_file_path,spreadsheet_id,sheet_name,running_workouts_desc)
running_workouts_desc

In [None]:
sheet_name = 'Biking_Workouts'
WriteToGsheet(service_file_path,spreadsheet_id,sheet_name,biking_workouts_df)
biking_workouts_df.head(1)

In [None]:
sheet_name = 'Biking_Workouts_Desc'
WriteToGsheet(service_file_path,spreadsheet_id,sheet_name,biking_workouts_desc)
biking_workouts_desc