# Import Libraries

In [1]:
import requests
import json
from math import ceil
import pandas as pd
import numpy as np
import os
from math import floor, ceil
import plotly.graph_objects as go

# Get Data

In [2]:
NAME = os.environ.get('PELOTON_DISPLAY_NAME')
USER = os.environ.get('PELOTON_USER')
EMAIL = os.environ.get('PELOTON_EMAIL')
PWD = os.environ.get('PELOTON_PWD')
PAYLOAD = {
    'username_or_email': EMAIL, 
    'password': PWD
}
BASE_URL = 'https://api.onepeloton.com'
BASE_API_URL = f'{BASE_URL}/api'
LOGIN_URL = f'{BASE_URL}/auth/login'
USER_URL = f'{BASE_API_URL}/me'

In [3]:
s = requests.Session()
s.post(LOGIN_URL, json=PAYLOAD)

<Response [200]>

In [4]:
me = s.get(USER_URL)

In [5]:
total_workouts = me.json().get('total_workouts')
pages = ceil(total_workouts/20)
userid = me.json().get('id')

In [6]:
WORKOUTS_URL = f'{BASE_API_URL}/user/{userid}/workouts'

In [7]:
workouts_df = pd.DataFrame()

In [8]:
for i in range(pages):
    url = WORKOUTS_URL + '?page={}'.format(i)
    dat = s.get(url)
    dats = dat.json().get('data')
    workouts_df = pd.concat([workouts_df, pd.DataFrame(dats)], axis=0)

In [9]:
workouts_df['created_at'] = pd.to_datetime(workouts_df['created_at'], unit='s')
workouts_df['start_time'] = pd.to_datetime(workouts_df['start_time'], unit='s')
workouts_df['created'] = pd.to_datetime(workouts_df['created'], unit='s')
workouts_df['device_time_created_at'] = pd.to_datetime(workouts_df['device_time_created_at'], unit='s')

In [10]:
workout_ids = list(pd.unique(workouts_df['id']))

In [11]:
existing_data = pd.read_excel('data/{}_workouts.xlsx'.format(NAME))

In [12]:
existing_ids = existing_data['id'].values.tolist()

In [13]:
new_cols_added = 1
if new_cols_added:
    new_ids = [wid for wid in workout_ids]
else:
    new_ids = [wid for wid in workout_ids if wid not in existing_ids]

In [14]:
WORKOUT_DETAILS_URL = f'{BASE_API_URL}/workout'
INSTRUCTOR_URL = f'{BASE_API_URL}/instructor'
METRICS = f'{WORKOUT_DETAILS_URL}/performance_graph'

In [15]:
workout_url = WORKOUT_DETAILS_URL + '/{}'.format(workout_ids[5])
metrics_url = workout_url + '/performance_graph'
achievements_url = workout_url + '/achievements'
summary_url = workout_url + '/summary'
workout_resp = s.get(workout_url)
workout = workout_resp.json()

In [16]:
s.get(workout_url).json()

{'created_at': 1618254175,
 'device_type': 'home_bike_v1',
 'end_time': 1618256034,
 'fitbit_id': None,
 'fitness_discipline': 'cycling',
 'has_pedaling_metrics': True,
 'has_leaderboard_metrics': True,
 'id': '255c2dd79cc348d2b5e659c4956e2a08',
 'is_total_work_personal_record': True,
 'metrics_type': 'cycling',
 'name': 'Cycling Workout',
 'peloton_id': '1dd240be7bf944a29a78dacb69776ec9',
 'platform': 'home_bike',
 'start_time': 1618254237,
 'strava_id': '5115747699',
 'status': 'COMPLETE',
 'timezone': 'Etc/GMT+4',
 'title': None,
 'total_work': 557100.71,
 'user_id': 'edb26e1666aa47078e66c854dabeed4f',
 'workout_type': 'class',
 'total_video_watch_time_seconds': 1831,
 'total_video_buffering_seconds': 0,
 'v2_total_video_watch_time_seconds': 1982,
 'v2_total_video_buffering_seconds': 0,
 'total_music_audio_play_seconds': None,
 'total_music_audio_buffer_seconds': None,
 'created': 1618254175,
 'device_time_created_at': 1618239775,
 'is_skip_intro_available': True,
 'ride': {'availab

In [17]:
workout_info = pd.DataFrame()
if len(new_ids) > 0:
    for wid in new_ids:
        workout_url = WORKOUT_DETAILS_URL + '/{}'.format(wid)
        metrics_url = workout_url + '/performance_graph'
        summary_url = workout_url + '/summary'
        workout = s.get(workout_url).json()
        summary = s.get(summary_url).json()
#         metrics = s.get(metrics_url).json()
        class_title = workout.get('ride').get('title')
        length = workout.get('ride').get('pedaling_duration')/60
        difficulty = workout.get('ride').get('difficulty_rating_avg')
        instructor_id = workout.get('ride').get('instructor_id')
        d = {
            'workout_id': wid, 
            'instructor_id': instructor_id, 
            'class_title': workout.get('ride').get('title'), 
            'length': floor(round(workout.get('ride').get('pedaling_duration')/60,0)/5)*5, 
            'difficulty': workout.get('ride').get('difficulty_rating_avg'),
            'leaderboard_rank': workout.get('leaderboard_rank'),
            'total_leaderboard_users': workout.get('total_leaderboard_users'),
            'max_power': [summary.get('max_power')],
            'avg_power': [summary.get('avg_power')],
            'max_cadence': [summary.get('max_cadence')],
            'avg_cadence': [summary.get('avg_cadence')],
            'max_resistance': [summary.get('max_resistance')],
            'avg_resistance': [summary.get('avg_resistance')],
            'max_speed': [summary.get('max_speed')],
            'avg_speed': [summary.get('avg_speed')],
            'max_heart_rate': [summary.get('max_heart_rate')],
            'avg_heart_rate': [summary.get('avg_heart_rate')],
            'distance': [summary.get('distance')],
            'calories': [summary.get('calories')]
        }
        df = pd.DataFrame(data=d)
        workout_info = pd.concat([workout_info, df], axis=0)

In [18]:
if len(new_ids) > 0:
    instructor_ids = workout_info['instructor_id'].unique()
    instructors = pd.DataFrame(columns=['instructor_id','instructor','instructor_spotify_playlist'])
    for ins_id in instructor_ids:
        instructor_url = INSTRUCTOR_URL + '/{}'.format(ins_id)
        instructor_resp = s.get(instructor_url)
        instructor = instructor_resp.json()
        name = instructor.get('name')
        spotify_uri = instructor.get('spotify_playlist_uri')
        d = {'instructor_id': ins_id, 'instructor': [name], 'instructor_spotify_playlist': [spotify_uri]}
        df = pd.DataFrame(data=d)
        instructors = pd.concat([instructors, df], axis=0)

In [19]:
if len(new_ids) > 0:
    new_workouts = workouts_df.merge(workout_info, left_on='id', right_on='workout_id').merge(instructors, on='instructor_id')

In [20]:
if new_cols_added:
    all_workout_data = new_workouts
else:
    if len(new_ids) > 0:
        all_workout_data = pd.concat([existing_data, new_workouts], axis=0)
    else:
        all_workout_data = existing_data

In [27]:
with pd.ExcelWriter('data/{}_workouts.xlsx'.format(USER)) as writer:
    all_workout_data.to_excel(writer, sheet_name='workouts', index=False)

In [26]:
all_workout_data.loc[all_workout_data['distance']>0,]

Unnamed: 0,created_at,device_type,end_time,fitbit_id,fitness_discipline,has_pedaling_metrics,has_leaderboard_metrics,id,is_total_work_personal_record,metrics_type,...,avg_resistance,max_speed,avg_speed,max_heart_rate,avg_heart_rate,distance,calories,instructor,instructor_spotify_playlist,power_zone
0,2021-04-20 12:56:00,home_bike_v1,1618926120,,cycling,True,True,078371f837954cf08ae2995b2ad311c9,False,cycling,...,48.63,25.76,22.23,166.0,144.52,16.67,658.61,Matt Wilpers,spotify:user:onepeloton:playlist:2EonpjYpcEf7r...,True
1,2021-04-01 00:47:03,home_bike_v1,1617239883,,cycling,True,True,a2798fc852154cd7a4296e02ae4908d7,False,cycling,...,49.62,24.55,21.81,0.0,0.00,10.90,582.45,Matt Wilpers,spotify:user:onepeloton:playlist:2EonpjYpcEf7r...,True
2,2021-02-14 17:00:16,home_bike_v1,1613323276,,cycling,True,True,3bb6193b2c98451f8c69b1c3ed5618e4,True,cycling,...,66.38,32.22,25.55,0.0,0.00,8.51,582.43,Matt Wilpers,spotify:user:onepeloton:playlist:2EonpjYpcEf7r...,False
4,2020-07-01 10:56:31,home_bike_v1,1593602850,,cycling,True,True,61298a352cc64458a4810c71d4553a42,False,cycling,...,58.87,30.15,22.52,0.0,0.00,11.25,651.80,Matt Wilpers,spotify:user:onepeloton:playlist:2EonpjYpcEf7r...,False
5,2020-05-21 11:55:19,home_bike_v1,1590063378,,cycling,True,True,25055692e9e1494f9d1f3d609ef03189,False,cycling,...,60.41,32.81,25.09,0.0,0.00,8.36,557.93,Matt Wilpers,spotify:user:onepeloton:playlist:2EonpjYpcEf7r...,False
6,2020-05-21 11:38:30,home_bike_v1,1590062070,,cycling,True,True,97c5ac8a45434a10b2b53d4ab2aa2acc,False,cycling,...,41.15,26.09,18.05,0.0,0.00,4.51,192.92,Matt Wilpers,spotify:user:onepeloton:playlist:2EonpjYpcEf7r...,False
7,2020-05-15 10:38:19,home_bike_v1,1589542759,,cycling,True,True,c7579823e7704db4b2bd68c3ff4b26f3,False,cycling,...,50.43,24.94,21.29,0.0,0.00,21.29,1098.74,Matt Wilpers,spotify:user:onepeloton:playlist:2EonpjYpcEf7r...,True
8,2020-05-03 10:56:36,home_bike_v1,1588505256,,cycling,True,True,becd4d6490f64010a066d494e76fe8a9,False,cycling,...,48.78,25.13,20.75,0.0,0.00,10.37,516.07,Matt Wilpers,spotify:user:onepeloton:playlist:2EonpjYpcEf7r...,True
9,2020-04-28 14:12:21,home_bike_v1,1588083800,,cycling,True,True,ab0243c1706d4d8a87691ecf46493b10,False,cycling,...,40.22,20.25,16.12,0.0,0.00,2.68,93.81,Matt Wilpers,spotify:user:onepeloton:playlist:2EonpjYpcEf7r...,False
10,2020-04-22 15:53:11,home_bike_v1,1587572051,,cycling,True,True,3549c2883329404499324f60cfa117ec,False,cycling,...,45.95,23.61,20.33,0.0,0.00,6.77,325.02,Matt Wilpers,spotify:user:onepeloton:playlist:2EonpjYpcEf7r...,True


In [23]:
all_workout_data['power_zone'] = all_workout_data['class_title'].str.contains('Power Zone')

# Charts/Metrics

## Totals

In [24]:
total_work = all_workout_data['total_work'].sum()
total_calories = all_workout_data['calories'].sum()
total_miles = all_workout_data['distance'].sum()
total_workouts = all_workout_data.shape[0]
print('Workouts:', total_workouts)
print('Total Output:', total_work/1000)
print('Total Calories:', total_calories)
print('Total Distance:', total_miles, 'Miles')

Workouts: 440
Total Output: 99051.43888
Total Calories: 164030.43
Total Distance: 2622.3500000000004 Miles


In [25]:
all_workout_data.loc[all_workout_data['created_ym'] == '2021-04', ].transpose()

KeyError: 'created_ym'

In [None]:
all_workout_data['created_ym'] = all_workout_data['created_at'].dt.strftime('%Y-%m')

In [None]:
all_workout_data.groupby('created_ym').agg({
    'id': 'nunique',
    'length': 'sum'
}).reset_index()

In [None]:
all_workout_data.groupby('length').agg({
    'id': 'nunique',
    'total_work': 'mean'
}).reset_index()

In [None]:
all_workout_data.columns

In [None]:
all_workout_data[['id','total_work','calories','distance','difficulty','leaderboard_rank_pct_of_total']].head()

In [None]:
go.Figure(go.Histogram(
    x=all_workout_data.loc[all_workout_data['total_work'] > 0, 'total_work']/1000
))

## PRs

In [None]:
cycling_prs = all_workout_data.loc[
    all_workout_data['fitness_discipline'] == 'cycling', 
].groupby('length').agg(
    {
        'total_work': ['max','mean'],
        'id': 'count'
    }
).reset_index()
cycling_prs.columns = ['_'.join(col) for col in cycling_prs.columns]
cycling_prs.columns = ['class_length', 'personal_record', 'average_output', 'rides']

In [None]:
cycling_prs

## Workout Types

In [None]:
all_workout_data.groupby('fitness_discipline').agg({
    'id': 'count'
}).reset_index().rename(columns={
    'id': 'workouts'
}).sort_values('workouts', ascending=False)

## Instructors

### Overall

In [None]:
instructor_agg =  all_workout_data.groupby('instructor').agg({
    'id': 'count',
    'total_work': ['mean','max']
}).reset_index()
instructor_agg.columns = ['_'.join(col) if col[1] != '' else col[0] for col in instructor_agg.columns]
instructor_agg.columns = ['instructor','workouts','mean_output','pr']

In [None]:
instructor_agg

### By Workout Type

In [None]:
all_workout_data.groupby(['fitness_discipline','instructor']).agg({
    'id': 'count'
}).reset_index().rename(columns={
    'id': 'workouts'
}).sort_values(['fitness_discipline', 'workouts'], ascending=False)

## Trending

In [None]:
all_workout_data['leaderboard_rank_pct_of_total'] = np.where(
    pd.isna(all_workout_data['leaderboard_rank']) | pd.isna(all_workout_data['total_leaderboard_users']), 
    0, 
    all_workout_data['leaderboard_rank']/all_workout_data['total_leaderboard_users']
)

In [None]:
all_workout_data.sort_values('created_at', ascending=False).head().transpose()

In [None]:
sorted(all_workout_data['class_title'].str.replace('^\\d+ min | Ride$','',regex=True).unique().tolist())