- Point at directory of new export
- Pull in 'activities'
- Build a top-level activity dataframe
- For run, walk and ride create 3 lists of files
- For each activity: 
    - create a dataframe of the time, heartrate, speed, lat/long and output


In [None]:
import os
import pandas as pd
import gpxpy
import gzip
#from lmxl import etree
from fitparse import FitFile

In [None]:
main_dir = '../strava_data_dumps/STRAVA+export_8029714'

In [None]:
head_df = pd.read_csv(f'{main_dir}/activities.csv')
head_df['Activity Date'] = pd.to_datetime(head_df['Activity Date'], format='%b %d, %Y, %I:%M:%S %p')

head_df.columns = head_df.columns.str.upper().str.replace(' ', '_')
head_df.dropna(thresh=1000, axis=1, inplace=True)
head_df['FILE_SUFFIX'] = head_df['FILENAME'].str[-6:].str.replace('\d.', '', regex=True)


In [None]:
head_df = head_df[['ACTIVITY_ID', 'FILENAME','FILE_SUFFIX', 'ACTIVITY_DATE', 'ACTIVITY_NAME', 'ACTIVITY_TYPE', 
         'CALORIES', 'AVERAGE_HEART_RATE', 'MAX_HEART_RATE',
        'ELAPSED_TIME.1', 'MOVING_TIME', 'DISTANCE.1', 'MAX_SPEED', 'AVERAGE_SPEED', 'AVERAGE_ELAPSED_SPEED',
        'ELEVATION_GAIN', 'ELEVATION_LOSS', 'ELEVATION_LOW', 'ELEVATION_HIGH', 'MAX_GRADE', 'AVERAGE_GRADE']]

In [None]:
head_df = head_df.loc[head_df['ACTIVITY_TYPE'].isin(['Ride','Walk','Run'])].copy()
head_df.groupby(['FILE_SUFFIX','ACTIVITY_TYPE'])['ACTIVITY_ID'].nunique()

In [10]:
for n,g in head_df.groupby(['FILE_SUFFIX','ACTIVITY_TYPE']):
    print(n[0], '--', n[1])
    print(g['FILENAME'])

.gpx -- Ride
0         activities/261971610.gpx
2         activities/300397733.gpx
5         activities/510172119.gpx
7         activities/556158192.gpx
8         activities/575489495.gpx
                   ...            
715      activities/8656624411.gpx
716      activities/8700364349.gpx
717      activities/8733245910.gpx
1201    activities/10685697605.gpx
1334    activities/11561166175.gpx
Name: FILENAME, Length: 187, dtype: object
.gpx -- Run
3       activities/338178959.gpx
4       activities/338194381.gpx
6       activities/556084248.gpx
9       activities/588291107.gpx
10      activities/588292372.gpx
11      activities/650790613.gpx
338    activities/3646721779.gpx
340    activities/3656938870.gpx
341    activities/3667435866.gpx
343    activities/3681524574.gpx
345    activities/3691197201.gpx
348    activities/3705970303.gpx
351    activities/3769280034.gpx
352    activities/3803936148.gpx
353    activities/3809931147.gpx
354    activities/3814138735.gpx
355    activities/3

In [18]:
def semicircles_to_degrees(semicircles):
    
    """
    Converts lat/long from the fit format of semicircle --> normal lat/longs
    """

    return semicircles * (180 / 2**31)

In [20]:
# Walk through directories and subdirectories

gpx_files = []
fit_files = []
#tcx_files = []

for root, dirs, files in os.walk(f'{main_dir}/activities'):

    for file in files:

        if file.endswith('.fit.gz'):
            file_path = os.path.join(root, file)
            print(f'fit file! {file_path}')
            all_records = []
            with gzip.open(file_path, 'rb') as f:
                fitfile = FitFile(f)
                for record in fitfile.get_messages('record'):
                    data = {d.name: d.value for d in record}
                    data['source_file'] = file  # Optional: track which file it came from
                    all_records.append(data)
            # Convert to DataFrame
            df = pd.DataFrame(all_records)
            try:
                df['LAT'] = df['position_lat'].apply(semicircles_to_degrees)
                df['LONG'] = df['position_long'].apply(semicircles_to_degrees)
            except:
                pass
            fit_files.append(df)

        elif file.endswith('.gpx'):
            file_path = os.path.join(root, file)
            print(f'gpx file! {file_path}')
            with open(file_path, 'r') as gpx_file:
                gpx = gpxpy.parse(gpx_file)
                points = []
                for track in gpx.tracks:
                    for segment in track.segments:
                        for point in segment.points:
                            points.append({
                                'ID':file,
                                'latitude': point.latitude,
                                'longitude': point.longitude,
                                'elevation': point.elevation,
                                'time': point.time,
                                'speed': point.speed,
                                'heart_rate': 'NaN'
                            })
            df = pd.DataFrame(points)
            gpx_files.append(df)

        else:
            file_path = os.path.join(root, file)
            print(f'other file! {file_path}')

    

fit file! ../strava_data_dumps/STRAVA+export_8029714/activities/10778596247.fit.gz
fit file! ../strava_data_dumps/STRAVA+export_8029714/activities/11061016795.fit.gz
other file! ../strava_data_dumps/STRAVA+export_8029714/activities/2727704737.tcx.gz
fit file! ../strava_data_dumps/STRAVA+export_8029714/activities/12759412255.fit.gz
other file! ../strava_data_dumps/STRAVA+export_8029714/activities/4171869986.tcx.gz
fit file! ../strava_data_dumps/STRAVA+export_8029714/activities/12852566380.fit.gz
fit file! ../strava_data_dumps/STRAVA+export_8029714/activities/13282848920.fit.gz
fit file! ../strava_data_dumps/STRAVA+export_8029714/activities/8920586120.fit.gz
fit file! ../strava_data_dumps/STRAVA+export_8029714/activities/9950417558.fit.gz
gpx file! ../strava_data_dumps/STRAVA+export_8029714/activities/5650748315.gpx
fit file! ../strava_data_dumps/STRAVA+export_8029714/activities/10751797773.fit.gz
fit file! ../strava_data_dumps/STRAVA+export_8029714/activities/11963115478.fit.gz
fit file

In [21]:
gpx_df = pd.concat(gpx_files, ignore_index=True)

gpx_df

Unnamed: 0,ID,latitude,longitude,elevation,time,speed,heart_rate
0,5650748315.gpx,55.576194,-1.672036,17.2,2021-07-19 05:15:23+00:00,,
1,5650748315.gpx,55.576195,-1.672042,17.2,2021-07-19 05:15:24+00:00,,
2,5650748315.gpx,55.576196,-1.672042,17.2,2021-07-19 05:15:25+00:00,,
3,5650748315.gpx,55.576193,-1.672040,17.2,2021-07-19 05:15:26+00:00,,
4,5650748315.gpx,55.576190,-1.672039,17.2,2021-07-19 05:15:27+00:00,,
...,...,...,...,...,...,...,...
928954,5504776171.gpx,54.759829,-1.551646,44.6,2021-06-21 09:00:54+00:00,,
928955,5504776171.gpx,54.759817,-1.551640,44.6,2021-06-21 09:00:55+00:00,,
928956,5504776171.gpx,54.759811,-1.551639,44.6,2021-06-21 09:00:56+00:00,,
928957,5504776171.gpx,54.759809,-1.551635,44.6,2021-06-21 09:00:57+00:00,,


In [22]:
fit_df = pd.concat(fit_files, ignore_index=True)

fit_df

Unnamed: 0,distance,timestamp,source_file,enhanced_altitude,enhanced_speed,gps_accuracy,position_lat,position_long,speed,heart_rate,LAT,LONG
0,0.0,2023-10-18 11:21:02,10778596247.fit.gz,,,,,,,,,
1,,2023-10-18 11:21:02,10778596247.fit.gz,41.8,1.354,3.0,653310815.0,-18523787.0,1.354,,54.759880,-1.552646
2,,2023-10-18 11:21:03,10778596247.fit.gz,41.8,1.438,3.0,653310656.0,-18523762.0,1.438,,54.759867,-1.552644
3,,2023-10-18 11:21:04,10778596247.fit.gz,41.8,1.478,2.0,653310490.0,-18523729.0,1.478,105.0,54.759853,-1.552641
4,,2023-10-18 11:21:05,10778596247.fit.gz,41.8,1.495,2.0,653310319.0,-18523706.0,1.495,,54.759838,-1.552639
...,...,...,...,...,...,...,...,...,...,...,...,...
1107934,,2023-04-27 05:45:12,9650064013.fit.gz,46.0,0.047,2.0,653311095.0,-18508749.0,0.047,100.0,54.759903,-1.551385
1107935,,2023-04-27 05:45:13,9650064013.fit.gz,46.0,0.031,2.0,653311094.0,-18508742.0,0.031,98.0,54.759903,-1.551385
1107936,,2023-04-27 05:45:14,9650064013.fit.gz,46.0,0.017,2.0,653311093.0,-18508737.0,0.017,,54.759903,-1.551384
1107937,,2023-04-27 05:45:15,9650064013.fit.gz,46.0,0.009,2.0,653311092.0,-18508735.0,0.009,,54.759903,-1.551384


# GPX Files

In [None]:
# Load a GPX file
with open(f'{main_dir}/261971610.gpx', 'r') as gpx_file:
    gpx = gpxpy.parse(gpx_file)

points = []

for track in gpx.tracks:
    for segment in track.segments:
        for point in segment.points:
            points.append({
                'latitude': point.latitude,
                'longitude': point.longitude,
                'elevation': point.elevation,
                'time': point.time,
                'speed': point.speed,
                'heart_rate': 'NaN'
            })

ride_gpx = pd.DataFrame(points)

# .fit Files

In [None]:
all_records = []

# Walk through directories and subdirectories
for root, dirs, files in os.walk(main_dir):
    for file in files:
        if file.endswith('.fit.gz'):
            file_path = os.path.join(root, file)
            print(file_path)
            with gzip.open(file_path, 'rb') as f:
                fitfile = FitFile(f)
                for record in fitfile.get_messages('record'):
                    data = {d.name: d.value for d in record}
                    data['source_file'] = file  # Optional: track which file it came from
                    all_records.append(data)

# Convert to DataFrame
ride_fit = pd.DataFrame(all_records)

def semicircles_to_degrees(semicircles):
    
    """
    Converts lat/long from the fit format of semicircle --> normal lat/longs
    """

    return semicircles * (180 / 2**31)

ride_fit['LAT'] = ride_fit['position_lat'].apply(semicircles_to_degrees)
ride_fit['LONG'] = ride_fit['position_long'].apply(semicircles_to_degrees)
#ride_fit.to_csv('GPS_route.csv')