In [116]:
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
import matplotlib.image as img
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

In [117]:
df = pd.read_csv(r'C:\Users\mcdlu\Desktop\School\STAT 386\Data\Garmin Activities.csv')

In [143]:
df = df.replace(',','', regex=True)
df = df.replace('--',0, regex=True)
df['Distance'] = df['Distance'].astype(float)
df['Aerobic TE'] = df['Aerobic TE'].astype(float)
df['Activity Type'] = df['Activity Type'].astype(str)
df[['Date','Moving Time','Elapsed Time','Avg Ground Contact Time']] = df[['Date','Moving Time','Elapsed Time','Avg Ground Contact Time']].apply(pd.to_datetime)
df['Activity Type'] = np.where(df['Activity Type'].isin(['Pool Swimming','Open Water Swimming']), 'Swimming',df['Activity Type'])
df['Activity Type'] = np.where(df['Activity Type'] == 'Trail Running', 'Running',df['Activity Type'])
df['Activity Type'] = np.where(df['Activity Type'] == 'Multisport', 'Triathlon',df['Activity Type'])
df['Activity Type'] = np.where(df['Activity Type'] == 'Indoor Cycling', 'Cycling',df['Activity Type'])
Work = df[df['Activity Type'].isin(['Cycling','Running','Swimming','Triathlon'])]
Work = Work[['Activity Type','Title','Date','Distance','Moving Time','Avg HR','Max HR','Aerobic TE','Calories',]].reset_index()
Work.loc[(Work['Activity Type'] == 'Swimming') & (Work['Date'].isin(['2020-07-29 15:26:13','2020-09-22 15:06:05','2021-08-12 15:03:31','2021-08-19 14:36:08'])),'Distance'] *= 0.000621371
Work.loc[(Work['Activity Type'] == 'Swimming') & (~Work['Date'].isin(['2020-07-29 15:26:13','2020-09-22 15:06:05','2021-08-12 15:03:31','2021-08-19 14:36:08'])),'Distance'] *= 0.000568182
Work['Competition'] = "Training"
Work.loc[Work['Date'].isin(['2022-04-23 07:08:58','2022-07-16 07:16:24','2021-09-11 07:41:11']),'Competition'] = "Race"

In [144]:
# Create running total for Distance
Work['Running Total'] = 0.0
Work['Running Total'][len(Work)-1] = Work['Distance'][len(Work)-1]
for x in reversed(range(len(Work)-1)):
    Work['Running Total'][x] = Work['Distance'][x]+Work['Running Total'][x+1]

In [145]:
colours = {
    'Swimming':'blue',
    'Running':'lightgreen',
    'Cycling':'grey',
    'Triathlon': 'red'
}

In [146]:

scatter = px.scatter(
    Work,
    x = 'Date',
    y = 'Distance',
    hover_name = 'Title',
    color = 'Activity Type',
    color_discrete_map = colours,
    symbol='Competition'
)

scatter.update_layout(
    margin=dict(l=10, r=10, t=20, b=20),
    template = 'simple_white',
    yaxis_title = "Individual Activity Distance"
)

scatter.update_xaxes(tickangle=-45, 
                         tickfont=dict(family='Calibri', size=12),
                         dtick="M1"
)


In [147]:
Work[['Activity Type','Distance']].groupby(by = 'Activity Type').sum()

Unnamed: 0_level_0,Distance
Activity Type,Unnamed: 1_level_1
Cycling,1258.49
Running,520.2
Swimming,29.795128
Triathlon,94.83


In [148]:

line = px.line(
    Work,
    x = 'Date',
    y = 'Running Total',
)

line.update_layout(
    margin=dict(l=10, r=10, t=20, b=20),
    template = 'simple_white',
    yaxis_title = "Total Distance"
)

line.update_xaxes(tickangle=-45, 
                         tickfont=dict(family='Calibri', size=12),
                         dtick="M1")

In [149]:
Full_fig = make_subplots(specs=[[{"secondary_y": True}]])
# create two independent figures with px.line each containing data from multiple columns

line.update_traces(yaxis="y2")

Full_fig.add_traces(line.data + scatter.data)

Full_fig.layout.yaxis.title="Individual Activity Distance"
Full_fig.layout.yaxis2.title="Total Distance"

Full_fig.for_each_trace(lambda t: t.update(line=dict(color=t.marker.color)))

Full_fig.update_layout(
    margin=dict(l=10, r=10, t=20, b=20),
    template = 'simple_white'
)

Full_fig.update_xaxes(tickangle=-45, 
                         tickfont=dict(family='Calibri', size=12),
                         dtick="M1"
)
Full_fig.show()


