In [157]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from math import radians, sin, cos, sqrt, atan2

%load_ext autoreload
%autoreload 2

In [40]:
data = []
for trackpoint in root.findall('.//{http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2}Trackpoint'):
    time = trackpoint.find('{http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2}Time').text
    lat = trackpoint.find('{http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2}Position/{http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2}LatitudeDegrees').text
    lon = trackpoint.find('{http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2}Position/{http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2}LongitudeDegrees').text
    altitude = trackpoint.find('{http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2}AltitudeMeters').text
    distance = trackpoint.find('{http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2}DistanceMeters').text
    data.append([time, float(lat), float(lon), float(altitude), float(distance)])

df = pd.DataFrame(data, columns=['Time', 'Latitude', 'Longitude', 'AltitudeMeters', 'Total Distance (m)'])


In [156]:
df = create_df('data/ride.tcx')
df.head()

Unnamed: 0,Time,Latitude,Longitude,Altitude (M),Total Distance (M)
0,2023-09-22T22:41:40+00:00,37.99898,-1.13315,46.02,0.0
1,2023-09-22T22:41:49+00:00,37.998233,-1.13288,45.98,86.408879
2,2023-09-22T22:41:59+00:00,37.997487,-1.13261,45.68,172.817758
3,2023-09-22T22:42:09+00:00,37.99674,-1.13234,43.0,259.226638
4,2023-09-22T22:42:19+00:00,37.99659,-1.13301,43.69,320.31524


### Add to DF

In [42]:
def seg_speed(row):
    if row.name == 0:
        return float('NaN')
    
    seconds = row['TimeDiff'].total_seconds()
    distance_diff = row['Total Distance (m)'] - df.loc[row.name - 1, 'Total Distance (m)']
    
    return (distance_diff / 1000) / (seconds / 3600)

In [43]:
def prepare_df(df):
    df['Time'] = pd.to_datetime(df['Time'])

    df['TimeDiff'] = df['Time'].diff()
    
    df['AltitudeChange'] = df['AltitudeMeters'].diff()
    
    df['SegmentSpeed'] = df.apply(seg_speed, axis=1)
    
    df['CumTime'] = df['TimeDiff'].cumsum()
    
    df['Total Time (m)'] = df['CumTime'].dt.total_seconds() / 60
    
    df.drop(columns=['CumTime'], inplace=True)

In [44]:
prepare_df(df)
df.head()

Unnamed: 0,Time,Latitude,Longitude,AltitudeMeters,Total Distance (m),TimeDiff,AltitudeChange,SegmentSpeed,Total Time (m)
0,2023-09-22 22:41:40+00:00,37.99898,-1.13315,46.02,0.0,NaT,,,
1,2023-09-22 22:41:49+00:00,37.998233,-1.13288,45.98,86.408879,0 days 00:00:09,-0.04,34.563552,0.15
2,2023-09-22 22:41:59+00:00,37.997487,-1.13261,45.68,172.817758,0 days 00:00:10,-0.3,31.107196,0.316667
3,2023-09-22 22:42:09+00:00,37.99674,-1.13234,43.0,259.226638,0 days 00:00:10,-2.68,31.107197,0.483333
4,2023-09-22 22:42:19+00:00,37.99659,-1.13301,43.69,320.31524,0 days 00:00:10,0.69,21.991897,0.65


### Functions

In [45]:
def haversine(lat1, lon1, lat2, lon2):
    R = 6371
    
    lat1 = radians(lat1)
    lon1 = radians(lon1)
    lat2 = radians(lat2)
    lon2 = radians(lon2)
    
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    
    a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    
    distance = R * c
    return distance

In [46]:
def calc_total_distance(df):
    total_distance = 0
    for i in range(1, len(df)):
        lat1, lon1 = df['Latitude'][i - 1], df['Longitude'][i - 1]
        lat2, lon2 = df['Latitude'][i], df['Longitude'][i]

        seg_distance = haversine(lat1, lon1, lat2, lon2)
        total_distance += seg_distance
        
    return total_distance

In [47]:
def calc_moving_time(df):
    return df['TimeDiff'].sum()

In [48]:
def elevation_info(df):
    total_ascent = df['AltitudeChange'][df['AltitudeChange'] > 0].sum()
    total_descent = df['AltitudeChange'][df['AltitudeChange'] < 0].sum()
    total_change = df['AltitudeChange'].sum()
    lowest = df['AltitudeMeters'].min()
    highest = df['AltitudeMeters'].max()
    
    return total_ascent, total_descent, total_change, lowest, highest

In [49]:
def speed_info(df):
    average = calc_total_distance(df) / (calc_moving_time(df).total_seconds() / 3600)
    fastest = df['SegmentSpeed'].max()
    slowest = df['SegmentSpeed'].min()
    
    return average, fastest, slowest

### Extractions

In [50]:
f'The total distance is {calc_total_distance(df)} km/h.'

f'The total moving time is {calc_moving_time(df)}'

f'The total ascent was {elevation_info(df)[0]} metres'

f'The total descent was {elevation_info(df)[1]} metres'

f'The total change was {elevation_info(df)[2]} metres'

f'The lowest altitude was {elevation_info(df)[3]} metres'

f'The highest altitude was {elevation_info(df)[4]} metres'

f'The average speed was {speed_info(df)[0]} km/h'

f'The fastest speed was {speed_info(df)[1]} km/h'

f'The slowest speed was {speed_info(df)[2]} km/h'

### Visualising

'The total distance is 84.79457109912336 km/h.'

In [60]:
import plotly.express as px

In [158]:
import plotly.express as px
import plotly.graph_objs as go


fig = px.line(df, x='Total Time (m)', y='Total Distance (m)', title='Distance Over Time')

fig.update_xaxes(title_text='Time (minutes)')
fig.update_yaxes(title_text='Distance (Km)')

# Add a filled area under the curve
# fig.add_trace(go.Scatter(x=df['Total Time (m)'], y=df['Total Distance (m)'], fill='tozeroy', fillcolor='rgba(0,100,80,0.2)'))

fig.add_layout_image(
        dict(
            source="images/bg.jpg",
            xref="x",
            yref="y",
            x=0,
            y=3,
            sizex=2,
            sizey=2,
            sizing="stretch",
            opacity=0.5,
            layer="below")
)

# Show the plot
fig.show()

ValueError: Value of 'x' is not the name of a column in 'data_frame'. Expected one of ['Time', 'Latitude', 'Longitude', 'Altitude (M)', 'Total Distance (M)'] but received: Total Time (m)

In [62]:
df.head()

Unnamed: 0,Time,Latitude,Longitude,AltitudeMeters,Total Distance (m),TimeDiff,AltitudeChange,SegmentSpeed,Total Time (m)
0,2023-09-22 22:41:40+00:00,37.99898,-1.13315,46.02,0.0,NaT,,,
1,2023-09-22 22:41:49+00:00,37.998233,-1.13288,45.98,86.408879,0 days 00:00:09,-0.04,34.563552,0.15
2,2023-09-22 22:41:59+00:00,37.997487,-1.13261,45.68,172.817758,0 days 00:00:10,-0.3,31.107196,0.316667
3,2023-09-22 22:42:09+00:00,37.99674,-1.13234,43.0,259.226638,0 days 00:00:10,-2.68,31.107197,0.483333
4,2023-09-22 22:42:19+00:00,37.99659,-1.13301,43.69,320.31524,0 days 00:00:10,0.69,21.991897,0.65


In [63]:
target_distance = 1000
closest_index = (df['Total Distance (m)'] - target_distance).abs().idxmin()
closest_index - 1


27

In [64]:
import plotly.express as px
import plotly.graph_objs as go

# Your DataFrame preparation code here

# Create the line plot
fig = px.line(df, x='Total Time (m)', y='AltitudeMeters', title='Altitude Over Time')

# Customize the plot further (optional)
fig.update_xaxes(title_text='Time (minutes)')
fig.update_yaxes(title_text='Altitude in M')

# Add a filled area under the curve
fig.add_trace(go.Scatter(x=df['Total Time (m)'], y=df['AltitudeMeters'], fill='tozeroy', fillcolor='rgba(0,100,80,0.2)'))

# Show the plot
fig.show()

In [67]:
import plotly.express as px
import plotly.graph_objs as go

# Your DataFrame preparation code here

# Create the line plot
fig = px.line(df, x='Total Time (m)', y='SegmentSpeed', title='Speed Over Time')

# Customize the plot further (optional)
fig.update_xaxes(title_text='Time (minutes)')
fig.update_yaxes(title_text='Speed in Km/h')

# Add a filled area under the curve
fig.add_trace(go.Scatter(x=df['Total Time (m)'], y=df['SegmentSpeed'], fill='tozeroy', fillcolor='rgba(0,100,80,0.2)'))

# Show the plot
fig.show()

In [68]:
import plotly.express as px
import plotly.graph_objs as go


fig = px.line(df, x='Total Time (m)', y='SegmentSpeed', title='Speed Over Time', line_shape='spline')

# Customize the plot further (optional)
fig.update_xaxes(title_text='Time (minutes)')
fig.update_yaxes(title_text='Speed in Km/h')

# Add a filled area under the curve
fig.add_trace(go.Scatter(x=df['Total Time (m)'], y=df['SegmentSpeed'], fill='tozeroy', fillcolor='rgba(0,100,80,0.2)'))

# Show the plot
fig.show()


ValueError: 
    Invalid value of type 'builtins.str' received for the 'shape' property of scattergl.line
        Received value: 'spline'

    The 'shape' property is an enumeration that may be specified as:
      - One of the following enumeration values:
            ['linear', 'hv', 'vh', 'hvh', 'vhv']

# Splitting into km sections

In [71]:
df.head(30)

Unnamed: 0,Time,Latitude,Longitude,AltitudeMeters,Total Distance (m),TimeDiff,AltitudeChange,SegmentSpeed,Total Time (m)
0,2023-09-22 22:41:40+00:00,37.99898,-1.13315,46.02,0.0,NaT,,,
1,2023-09-22 22:41:49+00:00,37.998233,-1.13288,45.98,86.408879,0 days 00:00:09,-0.04,34.563552,0.15
2,2023-09-22 22:41:59+00:00,37.997487,-1.13261,45.68,172.817758,0 days 00:00:10,-0.3,31.107196,0.316667
3,2023-09-22 22:42:09+00:00,37.99674,-1.13234,43.0,259.226638,0 days 00:00:10,-2.68,31.107197,0.483333
4,2023-09-22 22:42:19+00:00,37.99659,-1.13301,43.69,320.31524,0 days 00:00:10,0.69,21.991897,0.65
5,2023-09-22 22:42:29+00:00,37.99644,-1.13368,43.87,381.403807,0 days 00:00:10,0.18,21.991884,0.816667
6,2023-09-22 22:42:39+00:00,37.99643,-1.13389,43.87,399.855758,0 days 00:00:10,0.0,6.642702,0.983333
7,2023-09-22 22:42:49+00:00,37.99647,-1.13419,44.03,426.541644,0 days 00:00:10,0.16,9.606919,1.15
8,2023-09-22 22:42:58+00:00,37.99656,-1.1344,44.02,447.507511,0 days 00:00:09,-0.01,8.386347,1.3
9,2023-09-22 22:43:08+00:00,37.99664,-1.13446,44.07,457.850028,0 days 00:00:10,0.05,3.723306,1.466667


In [76]:

closest_index

27

In [80]:

distance_until_full_km

62.310120180821855

In [83]:

distance_past_full_km

9.087655226804827

In [84]:

distance_in_segment 

71.39777540762668

In [108]:
percent_owed_to_segment = (distance_until_full_km / distance_in_segment)
percent_owed_to_segment * 100

87.27179498951996

In [89]:
time_spent_mid_segment = df.iloc[closest_index + 1]['TimeDiff']
time_to_add = int(str(time_spent_mid_segment)[-2:]) * percent_owed_to_segment
time_to_add

8.727179498951996

In [90]:
df.iloc[closest_index] # + percent_owed_to_segment

Time                  2023-09-22 22:46:06+00:00
Latitude                               37.99373
Longitude                              -1.13233
AltitudeMeters                            43.79
Total Distance (m)                    937.68988
TimeDiff                        0 days 00:00:10
AltitudeChange                             0.08
SegmentSpeed                           13.08131
Total Time (m)                         4.433333
Name: 27, dtype: object

In [107]:
import math

def interpolate_lat_lon(lat1, lon1, lat2, lon2, total_time, percentage):
    percentage = max(0, min(percentage, 100))
    
    intermediate_lat = lat1 + (lat2 - lat1) * (percentage / 100)
    intermediate_lon = lon1 + (lon2 - lon1) * (percentage / 100)
    
    return intermediate_lat, intermediate_lon

# Create new DF

In [151]:
from datetime import timedelta
import math

def convert_to_splits(df):
    ride_distance = math.floor(calc_total_distance(df))
    
    for km in range(ride_distance + 1):
        
        target_distance = 1000 * (km + 1)
        closest_index = (df['Total Distance (m)'] - target_distance).abs().idxmin() - 1


        time_to_add_timedelta = timedelta(seconds=time_to_add)

        start_time = df.iloc[0]['Time']
        end_time = df.iloc[closest_index]['Time'] + time_to_add_timedelta

        start_lat = df.iloc[0]['Latitude']
        start_lon = df.iloc[0]['Longitude']

        time_in_segment = int(str(df.iloc[closest_index + 1]['Time'] - df.iloc[closest_index]['Time'])[-2:])
        end_lat, end_lon = interpolate_lat_lon(lat1=df.iloc[closest_index]['Latitude'], lon1=df.iloc[closest_index]['Longitude'], lat2=df.iloc[closest_index + 1]['Latitude'], lon2=df.iloc[closest_index + 1]['Longitude'], total_time=time_in_segment, percentage=percent_owed_to_segment * 100)

        altitude_meters = df.iloc[closest_index + 1]['AltitudeMeters']

        total_distance = df.iloc[closest_index]['Total Distance (m)'] + (percent_owed_to_segment * distance_in_segment )

        moving_time_this_segment = end_time - start_time

        hours = moving_time_this_segment.seconds // 3600  # 3600 seconds in an hour
        minutes = (moving_time_this_segment.seconds % 3600) // 60  # 60 seconds in a minute
        seconds = moving_time_this_segment.seconds % 60

        single_figure = hours + (minutes / 60)


        average_speed = (total_distance / 1000) / single_figure
        
        new_entry = {
            'start_time': ,
            'end_time':
            'start_latitude': ,
            'start_longitude': ,
            'end_latitude': ,
            'end_longitude': ,
            'AltitudeMeters': ,
            'Total Distance (m)': ,
            'TimeDiff': ,
            'AltitudeChange': ,
            'SegmentSpeed': ,
        }


SyntaxError: expression expected after dictionary key and ':' (3374519092.py, line 40)

In [152]:
from datetime import timedelta
import math

def convert_to_splits(df):
    ride_distance = math.floor(calc_total_distance(df))
    split_data = []
    index = 0
    
    for km in range(ride_distance + 1):
        
        target_distance = 1000 * (km + 1)
        start_index = index
        closest_index = (df['Total Distance (m)'] - target_distance).abs().idxmin() - 1


        time_to_add_timedelta = timedelta(seconds=time_to_add)

        start_time = df.iloc[start_index]['Time']
        end_time = df.iloc[closest_index]['Time'] + time_to_add_timedelta

        start_lat = df.iloc[start_index]['Latitude']
        start_lon = df.iloc[start_index]['Longitude']

        time_in_segment = int(str(df.iloc[closest_index + 1]['Time'] - df.iloc[closest_index]['Time'])[-2:])
        end_lat, end_lon = interpolate_lat_lon(lat1=df.iloc[closest_index]['Latitude'], lon1=df.iloc[closest_index]['Longitude'], lat2=df.iloc[closest_index + 1]['Latitude'], lon2=df.iloc[closest_index + 1]['Longitude'], total_time=time_in_segment, percentage=percent_owed_to_segment * 100)

        altitude_meters = df.iloc[closest_index + 1]['AltitudeMeters']
        
        
        distance_until_full_km = target_distance - df.iloc[closest_index]['Total Distance (m)']
        distance_past_full_km = df.iloc[closest_index + 1]['Total Distance (m)'] - target_distance
        distance_in_segment = distance_until_full_km + distance_past_full_km
        total_distance = df.iloc[closest_index]['Total Distance (m)'] + (percent_owed_to_segment * distance_in_segment )

        moving_time_this_segment = end_time - start_time

        hours = moving_time_this_segment.seconds // 3600 
        minutes = (moving_time_this_segment.seconds % 3600) // 60
        seconds = moving_time_this_segment.seconds % 60

        single_figure = hours + (minutes / 60)

        average_speed = 1 / single_figure
        
        new_entry = {
            'start_time': start_time,
            'end_time': end_time,
            'moving_time': moving_time_this_segment,
            'start_latitude': start_lat,
            'start_longitude': start_lon,
            'end_latitude': end_lat,
            'end_longitude': end_lon,
            'AltitudeMeters': altitude_meters,
            'Total Distance (m)': total_distance,
            'SegmentSpeed': average_speed,
        }
        
        split_data.append(new_entry)
        
        index = closest_index + 1
        
    new_df = pd.DataFrame(split_data)
    return new_df
        


In [153]:
new_df = convert_to_splits(df)
new_df.head(20)

Unnamed: 0,start_time,end_time,moving_time,start_latitude,start_longitude,end_latitude,end_longitude,AltitudeMeters,Total Distance (m),SegmentSpeed
0,2023-09-22 22:41:40+00:00,2023-09-22 22:46:14.727179+00:00,0 days 00:04:34.727179,37.99898,-1.13315,37.993294,-1.131885,43.57,1000.0,15.0
1,2023-09-22 22:46:16+00:00,2023-09-22 22:49:41.727179+00:00,0 days 00:03:25.727179,37.99323,-1.13182,37.985116,-1.131949,46.78,1956.801641,20.0
2,2023-09-22 22:49:43+00:00,2023-09-22 22:56:36.727179+00:00,0 days 00:06:53.727179,37.98503,-1.131935,37.979499,-1.137157,44.91,2990.301095,10.0
3,2023-09-22 22:56:37+00:00,2023-09-22 23:00:13.727179+00:00,0 days 00:03:36.727179,37.97949,-1.13717,37.981717,-1.146692,45.99,3984.962914,20.0
4,2023-09-22 23:00:14+00:00,2023-09-22 23:03:30.727179+00:00,0 days 00:03:16.727179,37.9817,-1.14673,37.976488,-1.155043,46.55,4994.550526,20.0
5,2023-09-22 23:03:32+00:00,2023-09-22 23:06:37.727179+00:00,0 days 00:03:05.727179,37.97651,-1.15507,37.983563,-1.15947,46.93,5950.734906,20.0
6,2023-09-22 23:06:39+00:00,2023-09-22 23:09:35.727179+00:00,0 days 00:02:56.727179,37.98363,-1.1595,37.984996,-1.170081,48.19,6986.948827,30.0
7,2023-09-22 23:09:37+00:00,2023-09-22 23:12:52.727179+00:00,0 days 00:03:15.727179,37.98499,-1.170204,37.979323,-1.176137,49.87,7990.416531,20.0
8,2023-09-22 23:12:54+00:00,2023-09-22 23:15:50.727179+00:00,0 days 00:02:56.727179,37.97932,-1.17618,37.983213,-1.185453,50.62,8981.303124,30.0
9,2023-09-22 23:15:52+00:00,2023-09-22 23:18:28.727179+00:00,0 days 00:02:36.727179,37.98323,-1.18551,37.983252,-1.19683,51.94,9998.464803,30.0


In [138]:
ride_distance = math.floor(calc_total_distance(df))
for num in range(ride_distance + 1):
    print(num)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
