In [3]:
# Project idea -> add workout counter in look studio google
# Packages required for this project:
import requests
import pandas as pd
import numpy as np
import pygsheets
import datetime
from datetime import date
import json
import webbrowser
from time import sleep
import time
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from Functions import *

In [16]:
# Importing credentials for Strava's API
from Credentials import StravaCredentials

# Copy and paste this link in your browser and extract code -> #https://www.strava.com/oauth/authorize?client_id=99205&response_type=code&redirect_uri=http://localhost/&approval_prompt=force&scope=profile:read_all,activity:read_all
data = StravaCredentials.data

# In case you need to get the code again uncomment these:
# webbrowser.open(f"https://www.strava.com/oauth/authorize?client_id={data['client_id']}&response_type=code&redirect_uri=http://localhost/&approval_prompt=force&scope=profile:read_all,activity:read_all")
data['code'] = input("From the web broswer enter the code:")

In [17]:
# Creating function that returns the access token that is used in the other api calls
def GetToken(data):
    token = requests.post(url= 'https://www.strava.com/api/v3/oauth/token',data=data).json()
    access_token = token['access_token']
    
    return access_token

access_token = GetToken(data)

In [6]:
def retrieve_activities(access_token):
    url = "https://www.strava.com/api/v3/activities"
    activities = pd.DataFrame()
    page = 1
    while True:
        # get page of activities from Strava
        print('Getting page number:', page)
        r = requests.get(url + '?access_token=' + access_token + '&per_page=200' + '&page=' + str(page))
        
        # check for rate limit exceeded error
        if r.status_code != 200:
            print('Error:',r.status_code, 'stopping extraction')
            break
    
        else:
            r = r.json()
            print(f'Extraction of page {page} complete')
            # if no results then exit loop
            if (not r):
                print('Extraction done')
                break
            r = pd.json_normalize(r)
            # adding the new table to the data frame that is storing all the data
            activities = pd.concat([activities, r])
            page += 1

    try:
        # clean up the dataframe
        clean_activities = activities[['id',
            'name',
            'distance',
            'elapsed_time',
            'total_elevation_gain',
            'sport_type',
            'start_date',
            'achievement_count',
            'athlete_count',
            'start_latlng',
            'end_latlng',
            'average_speed',
            'max_speed',
            'average_temp',
            'average_heartrate',
            'max_heartrate',
            'average_cadence',
            'elev_high',
            'elev_low']]
        return clean_activities
    except:
        ('Error occurred during extraction')

# call the function to retrieve activities
activities = retrieve_activities(access_token)
clean_activities = activities

Getting page number: 1
Extraction of page 1 complete
Getting page number: 2
Extraction of page 2 complete
Getting page number: 3
Extraction of page 3 complete
Getting page number: 4
Extraction of page 4 complete
Getting page number: 5
Extraction of page 5 complete
Getting page number: 6
Extraction of page 6 complete
Extraction done


In [None]:
id_list = clean_activities['id']
id_list.to_csv(r'C:\Users\Manuel Elizaldi\Desktop\Learning-Testing\PyStrava\Outputs\idlist.csv')

In [None]:
id_list = pd.read_csv(r'C:\Users\Manuel Elizaldi\Desktop\Learning-Testing\PyStrava\Outputs\idlist.csv')
id_list = list(id_list['id'])

In [None]:
all_workouts_df = pd.read_csv(r'C:\Users\Manuel Elizaldi\Desktop\Learning-Testing\PyStrava\Outputs\all_workouts_df.csv')

In [25]:
# Setting up parameters for gspread - updating google sheet

service_file_path = r'C:\Users\Manuel Elizaldi\Desktop\Learning-Testing\PyStrava\Credentials\pacific-castle-303123-909a5ddcda92.json'
spreadsheet_id = '1pomkAzlndHBl_czERrwKkoZFUkJRGFjyhRTeoWA6CS4'
myscope = ['https://spreadsheets.google.com/feeds', 
            'https://www.googleapis.com/auth/drive']

mycred = ServiceAccountCredentials.from_json_keyfile_name(service_file_path,myscope) # type: ignore
client = gspread.authorize(mycred)
mysheet = client.open('workout-data').sheet1
list_of_row = mysheet.get_all_records()
all_workouts_df = pd.DataFrame(list_of_row)

In [8]:
# From the general table extract ids to be updated
updated_ids = list(clean_activities['id'])
len(updated_ids)

802

In [9]:
# Getting the current ids, not updated 
not_updated_workouts = list(all_workouts_df['activity_id'])
len(not_updated_workouts)

10

In [10]:
print('Adding',len(updated_ids) - len(not_updated_workouts),'new workouts.')

Adding 792 new workouts.


In [11]:
# Comparing both id lists and getting a subset of the ones that are missing
missing_workouts = list(set(updated_ids).difference(not_updated_workouts))

In [18]:
missing_workouts_json = GetAllWorkouts(missing_workouts,access_token)
missing_workouts_df = CleanWorkoutJson(missing_workouts_json)
missing_workouts_df = CreateScoreColumns(missing_workouts_df)

Extracting all workouts, due to the API rate limit, this will take 01:58 hours or 118.8 minutes.
Extracting workout: 1
Rate limit exceeded. Waiting for 15 minutes...
Extracting workout: 1
Rate limit exceeded. Waiting for 15 minutes...
Extracting workout: 1
Rate limit exceeded. Waiting for 15 minutes...
Extracting workout: 1
Extracting workout: 2
Extracting workout: 3
Extracting workout: 4
Extracting workout: 5
Extracting workout: 6
Extracting workout: 7
Extracting workout: 8
Extracting workout: 9
Extracting workout: 10
Extracting workout: 11
Extracting workout: 12
Extracting workout: 13
Extracting workout: 14
Extracting workout: 15
Extracting workout: 16
Extracting workout: 17
Extracting workout: 18
Extracting workout: 19
Extracting workout: 20
Extracting workout: 21
Extracting workout: 22
Extracting workout: 23
Extracting workout: 24
Extracting workout: 25
Extracting workout: 26
Extracting workout: 27
Extracting workout: 28
Extracting workout: 29
Extracting workout: 30
Extracting work

In [27]:
all_workouts_df = pd.concat([all_workouts_df, missing_workouts_df])

all_workouts_df.head(5)
print(len(all_workouts_df))

1588


In [20]:
missing_workouts_df.to_csv(r'C:\Users\Manuel Elizaldi\Desktop\Learning-Testing\PyStrava\Outputs\missing_workouts_df.csv')

In [None]:
def UpdateGoogleSheet(service_file_path, all_workouts_df, clean_activities):
    # Setting up gspread authentications
    mycred = ServiceAccountCredentials.from_json_keyfile_name(service_file_path,myscope) # type: ignore
    client = gspread.authorize(mycred)
    
    # Extracting data 
    mysheet = client.open('workout-data').sheet1
    list_of_row = mysheet.get_all_records()
    
    # Making dataframe from data we extracted
    all_workouts_df = pd.DataFrame(list_of_row)
    
    # From the general table extract ids to be updated
    updated_ids = list(clean_activities['id'])
    # Getting the current ids, not updated 
    not_updated_workouts = list(all_workouts_df['activity_id'])
    print('Adding',len(updated_ids) - len(not_updated_workouts),'new workouts.')
    # Comparing both id lists and getting a subset of the ones that are missing
    missing_workouts = list(set(updated_ids).difference(not_updated_workouts))
    
    # Extracting missing workouts and formatting json
    missing_workouts_json = GetAllWorkouts(missing_workouts,access_token)
    missing_workouts_df = CleanWorkoutJson(missing_workouts_json)
    
    # Applying model values
    missing_workouts_df = CreateScoreColumns(missing_workouts_df)
    
    # concat results
    all_workouts_df = pd.concat([all_workouts_df, missing_workouts_df])
    
    return all_workouts_df