In [7]:
import os
import sys
import gzip
import gpxpy
import shutil
import fitdecode
import gpxpy.gpx
import matplotlib
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm import tqdm
import matplotlib.pyplot as plt


from keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.utils import to_categorical
from sklearn.linear_model import LinearRegression
from tensorflow.python.framework.random_seed import set_random_seed

if os.getcwd().replace("algorithm_development", "utils") not in sys.path:
    sys.path.append(os.getcwd().replace("algorithm_development", "utils"))

import utils
import parse_fit
import parse_gpx
import parse_tcx

pd.options.display.max_columns = 100

%load_ext autoreload
%autoreload 2

```python
#######################
### UNZIP ALL FILES ###
#######################
strava_export_path = "\\".join(os.getcwd().split("\\")[:-1]) + "\\export_105647830"

for filename in os.listdir(strava_export_path + "\\activities"):
    if filename.endswith('.gz') and filename.replace(".gz", "") not in os.listdir(strava_export_path + "\\activities"): 
        print(filename)
        with gzip.open("\\".join((strava_export_path + "\\activities", filename)), 'rb') as f_in:
            with open("\\".join((strava_export_path + "\\activities", filename))[:-3], 'wb') as f_out:
                shutil.copyfileobj(f_in, f_out)
```

In [11]:
strava_export_path = "\\".join(os.getcwd().split("\\")[:-2]) + "\\export_105647830"

In [13]:
activities_df = pd.read_csv(f"{strava_export_path}\\activities.csv")
activities_df["Filename"] = activities_df["Filename"].apply(lambda v: v.split("/")[1].replace(".gz", "")
                                                            if isinstance(v, str) else v)

In [14]:
list_of_fit_files = ["\\".join((strava_export_path + "\\activities", v)) for v in os.listdir(strava_export_path + "\\activities")
                     if ".gpx" not in v and ".gz" not in v]
list_of_gpx_files = ["\\".join((strava_export_path + "\\activities", v)) for v in os.listdir(strava_export_path + "\\activities")
                     if ".gpx" in v]

In [16]:
# SAVE FILES TO AVOID RECOMPUTING EACH TIME
fit_file_laps = pd.read_parquet("{}\\fit_file_laps.parquet".format("\\".join(strava_export_path.split("\\")[:-1] + ["\\data"])))
fit_file_points = pd.read_parquet("{}\\fit_file_points.parquet".format("\\".join(strava_export_path.split("\\")[:-1] + ["\\data"])))

In [101]:
fit_file_points.pivot_table(index = "Activity Name", values = "timestamp", aggfunc = "min").sort_values("timestamp", ascending=False)

Unnamed: 0_level_0,timestamp
Activity Name,Unnamed: 1_level_1
GF IlLombardia,2024-10-13 05:31:03+00:00
Zwift - Mayan 8 Free ride,2024-10-11 16:44:31+00:00
Zwift - Snowman free ride,2024-10-09 16:54:34+00:00
Zwift - Control The Burn in Makuri Islands,2024-10-07 18:21:00+00:00
Zwift - HWBTWTDWH in Watopia,2024-10-07 16:37:50+00:00
...,...
Azzardando ghiaini a Teolo,2023-03-19 08:49:13+00:00
Long Ride di Marzo,2023-03-16 09:09:02+00:00
Caldo e vento sugli Euganei🔥🌬,2023-03-09 09:50:25+00:00
Back on the road,2023-03-04 10:06:35+00:00


In [98]:
fit_file_points.head()

Unnamed: 0,FileName,latitude,longitude,lap,altitude,cadence,calories,cycle_length,distance,enhanced_altitude,enhanced_speed,grade,heart_rate,position_lat,position_long,power,slope,current_slope,filtered_altitude,unknown_61,unknown_66,unknown_108,unknown_90,resistance,speed,temperature,time_from_course,timestamp,Activity ID,Activity Date,Activity Name,Activity Type,Activity Description,Elapsed Time,Distance,Max Heart Rate,Relative Effort,Commute,Activity Private Note,Activity Gear,Filename,Athlete Weight,Bike Weight,Elapsed Time.1,Moving Time,Distance.1,Max Speed,Average Speed,Elevation Gain,Elevation Loss,...,Type,Start Time,Weighted Average Power,Power Count,Prefer Perceived Exertion,Perceived Relative Effort,Commute.1,Total Weight Lifted,From Upload,Grade Adjusted Distance,Weather Observation Time,Weather Condition,Weather Temperature,Apparent Temperature,Dewpoint,Humidity,Weather Pressure,Wind Speed,Wind Gust,Wind Bearing,Precipitation Intensity,Sunrise Time,Sunset Time,Moon Phase,Bike,Gear,Precipitation Probability,Precipitation Type,Cloud Cover,Weather Visibility,UV Index,Weather Ozone,Jump Count,Total Grit,Average Flow,Flagged,Average Elapsed Speed,Dirt Distance,Newly Explored Distance,Newly Explored Dirt Distance,Activity Count,Total Steps,Carbon Saved,Pool Length,Training Load,Intensity,Average Grade Adjusted Pace,Timer Time,Total Cycles,Media
0,10057687651.fit,45.467359,9.129405,1,,,0.0,,0.0,,0.0,0.0,,542446724,108918042,,,,,,,,,,0.0,27.0,,2023-07-01 10:15:11+00:00,9376555421,"Jul 1, 2023, 10:15:11 AM",Day 1: Verso Asti,Ride,,23489,135.12,178.0,380.0,False,,Kross Vento 5.0,10057687651.fit,,9.8,23489.0,21619.0,135119.46875,16.460001,6.250033,573.0,543.0,...,,,,,0.0,180.0,0.0,,1.0,,1688206000.0,2.0,23.209999,24.049999,17.52,0.7,1009.0,0.95,3.03,206.0,0.0,1688183000.0,1688239000.0,0.375,11685373.0,,0.7,5.0,0.43,28541.710938,6.0,,,,,0.0,5.752457,680.5,,,,,,,,,,,,media/c57a75d2-79d8-41bb-95c1-7aa0e4688cc0.jpg...
1,10057687651.fit,45.467359,9.129405,1,,,0.0,,0.0,,0.0,0.0,,542446724,108918042,,,,,,,,,,0.0,27.0,,2023-07-01 10:15:12+00:00,9376555421,"Jul 1, 2023, 10:15:11 AM",Day 1: Verso Asti,Ride,,23489,135.12,178.0,380.0,False,,Kross Vento 5.0,10057687651.fit,,9.8,23489.0,21619.0,135119.46875,16.460001,6.250033,573.0,543.0,...,,,,,0.0,180.0,0.0,,1.0,,1688206000.0,2.0,23.209999,24.049999,17.52,0.7,1009.0,0.95,3.03,206.0,0.0,1688183000.0,1688239000.0,0.375,11685373.0,,0.7,5.0,0.43,28541.710938,6.0,,,,,0.0,5.752457,680.5,,,,,,,,,,,,media/c57a75d2-79d8-41bb-95c1-7aa0e4688cc0.jpg...
2,10057687651.fit,45.467388,9.129429,1,,0.0,0.0,,0.0,,1.03,0.0,87.0,542447068,108918332,,,,,,,,,,1.03,27.0,,2023-07-01 10:15:17+00:00,9376555421,"Jul 1, 2023, 10:15:11 AM",Day 1: Verso Asti,Ride,,23489,135.12,178.0,380.0,False,,Kross Vento 5.0,10057687651.fit,,9.8,23489.0,21619.0,135119.46875,16.460001,6.250033,573.0,543.0,...,,,,,0.0,180.0,0.0,,1.0,,1688206000.0,2.0,23.209999,24.049999,17.52,0.7,1009.0,0.95,3.03,206.0,0.0,1688183000.0,1688239000.0,0.375,11685373.0,,0.7,5.0,0.43,28541.710938,6.0,,,,,0.0,5.752457,680.5,,,,,,,,,,,,media/c57a75d2-79d8-41bb-95c1-7aa0e4688cc0.jpg...
3,10057687651.fit,45.467388,9.129429,1,,0.0,0.0,,0.0,,1.03,0.0,87.0,542447068,108918332,,,,,,,,,,1.03,27.0,,2023-07-01 10:15:18+00:00,9376555421,"Jul 1, 2023, 10:15:11 AM",Day 1: Verso Asti,Ride,,23489,135.12,178.0,380.0,False,,Kross Vento 5.0,10057687651.fit,,9.8,23489.0,21619.0,135119.46875,16.460001,6.250033,573.0,543.0,...,,,,,0.0,180.0,0.0,,1.0,,1688206000.0,2.0,23.209999,24.049999,17.52,0.7,1009.0,0.95,3.03,206.0,0.0,1688183000.0,1688239000.0,0.375,11685373.0,,0.7,5.0,0.43,28541.710938,6.0,,,,,0.0,5.752457,680.5,,,,,,,,,,,,media/c57a75d2-79d8-41bb-95c1-7aa0e4688cc0.jpg...
4,10057687651.fit,45.467388,9.129429,1,,0.0,0.0,,0.0,,1.03,0.0,88.0,542447068,108918332,,,,,,,,,,1.03,27.0,,2023-07-01 10:15:19+00:00,9376555421,"Jul 1, 2023, 10:15:11 AM",Day 1: Verso Asti,Ride,,23489,135.12,178.0,380.0,False,,Kross Vento 5.0,10057687651.fit,,9.8,23489.0,21619.0,135119.46875,16.460001,6.250033,573.0,543.0,...,,,,,0.0,180.0,0.0,,1.0,,1688206000.0,2.0,23.209999,24.049999,17.52,0.7,1009.0,0.95,3.03,206.0,0.0,1688183000.0,1688239000.0,0.375,11685373.0,,0.7,5.0,0.43,28541.710938,6.0,,,,,0.0,5.752457,680.5,,,,,,,,,,,,media/c57a75d2-79d8-41bb-95c1-7aa0e4688cc0.jpg...


In [83]:
_, dr = parse_fit.get_dataframes("{}\\16438299806_ACTIVITY.fit".format("\\".join(strava_export_path\
                                                                                 .split("\\")[:-1] + ["\\data\\DR"])))

# keep only columns with some values
dr = dr.loc[:, dr.notna().sum() > 0].drop(["FileName", "position_lat", "position_long", "lap",
                                           "enhanced_speed", "enhanced_altitude"], axis = 1)

dr["power"] = dr["power"].fillna(0)
dr["cadence"] = dr["cadence"].fillna(0)
dr["heart_rate"] = dr["heart_rate"].bfill()
dr["current_slope"] = dr["current_slope"].bfill()
dr["filtered_altitude"] = dr["filtered_altitude"].bfill()

In [117]:
# COMPUTE ADDITIONAL FEATURES:
# > lags on HR

# > lags on speed

# > lags on cadence
dr["cadence_5s"] = dr["cadence"].shift(5).bfill()
dr["cadence_10s"] = dr["cadence"].shift(5).bfill()
dr["cadence_30s"] = dr["cadence"].shift(5).bfill()
# > climbing so far
dr["elevation"] = dr["altitude"].diff().clip(lower=0).cumsum().fillna(0)
# > avg HR so far
dr["avg_heart_rate"] = dr["heart_rate"].expanding().mean()
# > distance so far --> campo distance


dr = dr.drop(["latitude", "longitude", "filtered_altitude", "temperature"])

dr.head(10)

Unnamed: 0,latitude,longitude,altitude,cadence,distance,heart_rate,power,current_slope,filtered_altitude,speed,temperature,timestamp
0,45.182134,9.152311,68.0,0.0,2.2,101.0,0.0,-0.140185,74.813774,2.398,32,2024-07-16 07:12:10+00:00
1,45.182122,9.152343,68.0,0.0,4.99,102.0,0.0,-0.140185,74.813774,2.473,32,2024-07-16 07:12:11+00:00
2,45.182112,9.152375,68.0,0.0,7.75,103.0,0.0,-0.140185,74.813774,2.473,32,2024-07-16 07:12:12+00:00
3,45.182103,9.152404,68.0,0.0,10.18,104.0,0.0,-0.140185,74.813774,2.286,31,2024-07-16 07:12:13+00:00
4,45.182095,9.152429,68.0,0.0,12.35,104.0,0.0,-0.140185,74.813774,1.764,31,2024-07-16 07:12:14+00:00
5,45.18209,9.152451,68.0,0.0,14.14,104.0,0.0,-0.140185,74.813774,1.764,31,2024-07-16 07:12:15+00:00
6,45.182084,9.152469,68.0,0.0,15.61,104.0,0.0,-0.140185,74.813774,1.764,31,2024-07-16 07:12:16+00:00
7,45.182076,9.152477,68.0,0.0,16.76,104.0,0.0,-0.140185,74.813774,1.764,31,2024-07-16 07:12:17+00:00
8,45.182066,9.15249,68.2,15.0,18.16,103.0,37.0,-0.140185,74.813774,1.764,31,2024-07-16 07:12:18+00:00
9,45.182056,9.152513,68.2,15.0,20.19,103.0,120.0,-0.140185,74.813774,1.764,31,2024-07-16 07:12:19+00:00


In [121]:
dr["cadence"].shift(5).bfill().head(20)

0      0.0
1      0.0
2      0.0
3      0.0
4      0.0
5      0.0
6      0.0
7      0.0
8      0.0
9      0.0
10     0.0
11     0.0
12     0.0
13    15.0
14    15.0
15    29.0
16    29.0
17    36.0
18    41.0
19    41.0
Name: cadence, dtype: float64

In [109]:
s = fit_file_points[fit_file_points["Activity Name"] == "GF IlLombardia"]["enhanced_altitude"]#.diff().fillna(0)

In [113]:
# s.diff()[s.diff() > 0].sum()

s.diff().clip(lower=0).cumsum().fillna(0)

820185       0.0
820186       0.0
820187       0.2
820188       0.6
820189       1.0
           ...  
827693    1752.8
827694    1752.8
827695    1753.6
827696    1753.8
827697    1754.0
Name: enhanced_altitude, Length: 7513, dtype: float64

In [104]:
dr["altitude"][dr["altitude"] > 0].diff().sum()

5.399999999999977

In [80]:
dr.head(40)

Unnamed: 0,latitude,longitude,altitude,cadence,distance,enhanced_altitude,heart_rate,power,current_slope,filtered_altitude,speed,temperature,timestamp
0,45.182134,9.152311,68.0,0.0,2.2,68.0,101.0,0.0,-0.140185,74.813774,2.398,32,2024-07-16 07:12:10+00:00
1,45.182122,9.152343,68.0,0.0,4.99,68.0,102.0,0.0,-0.140185,74.813774,2.473,32,2024-07-16 07:12:11+00:00
2,45.182112,9.152375,68.0,0.0,7.75,68.0,103.0,0.0,-0.140185,74.813774,2.473,32,2024-07-16 07:12:12+00:00
3,45.182103,9.152404,68.0,0.0,10.18,68.0,104.0,0.0,-0.140185,74.813774,2.286,31,2024-07-16 07:12:13+00:00
4,45.182095,9.152429,68.0,0.0,12.35,68.0,104.0,0.0,-0.140185,74.813774,1.764,31,2024-07-16 07:12:14+00:00
5,45.18209,9.152451,68.0,0.0,14.14,68.0,104.0,0.0,-0.140185,74.813774,1.764,31,2024-07-16 07:12:15+00:00
6,45.182084,9.152469,68.0,0.0,15.61,68.0,104.0,0.0,-0.140185,74.813774,1.764,31,2024-07-16 07:12:16+00:00
7,45.182076,9.152477,68.0,0.0,16.76,68.0,104.0,0.0,-0.140185,74.813774,1.764,31,2024-07-16 07:12:17+00:00
8,45.182066,9.15249,68.2,15.0,18.16,68.2,103.0,37.0,-0.140185,74.813774,1.764,31,2024-07-16 07:12:18+00:00
9,45.182056,9.152513,68.2,15.0,20.19,68.2,103.0,120.0,-0.140185,74.813774,1.764,31,2024-07-16 07:12:19+00:00


In [79]:
dr["altitude"].diff()

0        NaN
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
15693    0.0
15694    0.0
15695    0.0
15696    0.2
15697    0.0
Name: altitude, Length: 15698, dtype: float64