In [1]:
from fitparse import FitFile
from functools import partial
import gzip
import gpxpy
import numpy as np
import os
import pandas as pd

In [2]:
def semicir_to_degs(semicirc):
    return semicirc * (180 / 2**31)

def parse_fitgz(filename):
    try:
        fitfile = FitFile(gzip.open(filename))
        df = pd.DataFrame([{d['name']: d['value'] for d in r.as_dict()['fields']} 
                                   for r in fitfile.get_messages('record')])
        df['position_lat'] = df['position_lat'].map(semicir_to_degs)
        df['position_long'] = df['position_long'].map(semicir_to_degs)
        return df
    except Exception as e:
        print(f'Issue reading fit file {filename}.')

def parse_gpx(filename):
    gpx = gpxpy.parse(filename)
    track_coords = []
    for track in gpx.tracks:
        for segment in track.segments:
            for point in segment.points:
                track_coords.append([point.time, point.latitude, point.longitude, point.elevation])
    return pd.DataFrame(track_coords, columns=['position_lat', 'position_long', 'altitude'])

def parse_file(filename):
    if filename.endswith('.fit.gz'):
        return parse_fitgz(filename)
    elif filename.endswith('.gpx'):
        return parse_gpx(filename)
    elif filename.endswith('.gpx.gz'):
        return parse_gpx(gzip.open(filename))
    else:
        print(f'Add parser for {filename} to parse_file function.')

In [20]:
def full_path(directory, x):
    try:
        return os.path.join(f'../data/{directory}/', x)
    except Exception as e:
        print(f'Full_path error directory: {directory}, {x}')

def check_file_exists(x):
    try:
        return os.path.exists(x)
    except Exception as e:
        print(f'File {x} does not exist')
        
def get_activities(directory):
    df = pd.read_csv(f'../data/{directory}/activities.csv')
    df = df[['Activity ID', 'Activity Date', 'Activity Name', 'Activity Type', 
                             'Elapsed Time', 'Distance', 'Filename', 'Moving Time',
                             'Elevation Gain', 'Elevation Loss', 'Average Speed', 'Average Grade']]
    df['Activity Date'] = pd.to_datetime(df['Activity Date'])
    df.columns = [ x.lower().replace(" ", "_") for x in df.columns]
    df['filename'] = df['filename'].map(lambda x: full_path(directory, x))
    df['exists'] = df['filename'].map(check_file_exists)
    return df.sort_values('activity_date')

In [22]:
def combine_folders(folder_list):
    dfs = []
    for folder in folder_list:
        df = get_activities(folder)
        df['person'] = folder
        dfs.append(df)
    return pd.concat(dfs, ignore_index=True)

In [25]:
people = ['MB_Strava', 'BL_Strava', 'KM_Strava', 'LB_Strava']
everyone = combine_folders(people)

Full_path error directory: BL_Strava, nan
Full_path error directory: BL_Strava, nan
Full_path error directory: BL_Strava, nan
Full_path error directory: BL_Strava, nan
Full_path error directory: BL_Strava, nan
Full_path error directory: BL_Strava, nan
Full_path error directory: BL_Strava, nan
Full_path error directory: BL_Strava, nan
Full_path error directory: BL_Strava, nan
Full_path error directory: BL_Strava, nan
Full_path error directory: BL_Strava, nan
Full_path error directory: BL_Strava, nan
Full_path error directory: BL_Strava, nan
File None does not exist
File None does not exist
File None does not exist
File None does not exist
File None does not exist
File None does not exist
File None does not exist
File None does not exist
File None does not exist
File None does not exist
File None does not exist
File None does not exist
File None does not exist
Full_path error directory: KM_Strava, nan
Full_path error directory: KM_Strava, nan
Full_path error directory: KM_Strava, nan
Ful

In [26]:
everyone.tail(3)

Unnamed: 0,activity_id,activity_date,activity_name,activity_type,elapsed_time,distance,filename,moving_time,elevation_gain,elevation_loss,average_speed,average_grade,exists,person
4908,4847862092,2021-02-04 14:15:18,Morning Activity,Nordic Ski,3252,5.68,../data/LB_Strava/activities/5171408676.fit.gz,3174.0,98.0,98.0,1.792029,-0.014065,True,LB_Strava
4909,4847864782,2021-02-06 17:57:03,Morning Activity,Nordic Ski,5793,7.61,../data/LB_Strava/activities/5171411428.fit.gz,5433.0,75.0,66.0,1.401988,0.123408,True,LB_Strava
4910,4847866278,2021-02-22 21:44:45,Afternoon Activity,Nordic Ski,2948,5.41,../data/LB_Strava/activities/5171412935.fit.gz,2781.0,112.0,79.0,1.946926,0.616874,True,LB_Strava


In [30]:
for person in people:
    print(f'Shape of {person}: {everyone[everyone.person == person].shape}')

Shape of MB_Strava: (383, 14)
Shape of BL_Strava: (2406, 14)
Shape of KM_Strava: (1566, 14)
Shape of LB_Strava: (556, 14)


In [36]:
current = everyone[everyone.person == person]
person, current.shape

('LB_Strava', (556, 14))

In [40]:
current.to_dict(orient='records')[:2]

[{'activity_id': 455865774,
  'activity_date': Timestamp('2015-12-12 18:41:42'),
  'activity_name': 'Lunch Run',
  'activity_type': 'Run',
  'elapsed_time': 14065,
  'distance': 25.45,
  'filename': '../data/LB_Strava/activities/506850239.fit.gz',
  'moving_time': 13253.0,
  'elevation_gain': 830.0,
  'elevation_loss': nan,
  'average_speed': nan,
  'average_grade': -0.0337859988212585,
  'exists': True,
  'person': 'LB_Strava'},
 {'activity_id': 455865772,
  'activity_date': Timestamp('2015-12-13 17:55:24'),
  'activity_name': 'Morning Run',
  'activity_type': 'Run',
  'elapsed_time': 11960,
  'distance': 21.99,
  'filename': '../data/LB_Strava/activities/506850238.fit.gz',
  'moving_time': 11503.0,
  'elevation_gain': 529.0,
  'elevation_loss': nan,
  'average_speed': nan,
  'average_grade': 0.090041697025299,
  'exists': True,
  'person': 'LB_Strava'}]

In [41]:
dfs = []
for i, d in enumerate(current.to_dict(orient='records')):
    if i%20==0:
        print(i, len(dfs))
    try:
        df = parse_file(d['filename'])
        df['activity_id'] = d['activity_id']
        df['person'] = d['person']
        dfs.append(df)
    except Exception as e:
        pass

0 0
20 18
40 36
Issue reading fit file ../data/LB_Strava/activities/877660707.fit.gz.
Issue reading fit file ../data/LB_Strava/activities/1057621325.fit.gz.
60 54
Issue reading fit file ../data/LB_Strava/activities/1093136985.fit.gz.
80 73
100 92
Add parser for ../data/LB_Strava/activities/1217477450.tcx.gz to parse_file function.
Add parser for ../data/LB_Strava/activities/1217442924.tcx.gz to parse_file function.
120 108
Issue reading fit file ../data/LB_Strava/activities/1325790963.fit.gz.
140 126
160 145
180 165
200 183
220 202
240 222
Issue reading fit file ../data/LB_Strava/activities/2551741241.fit.gz.
260 241
280 261
300 281
320 301
Issue reading fit file ../data/LB_Strava/activities/3174278624.fit.gz.
Issue reading fit file ../data/LB_Strava/activities/3177023014.fit.gz.
340 319
Issue reading fit file ../data/LB_Strava/activities/3185356169.fit.gz.
Issue reading fit file ../data/LB_Strava/activities/3192244831.fit.gz.
Issue reading fit file ../data/LB_Strava/activities/3194741

In [54]:
person_df = pd.concat(dfs)
person_df['timestamp'] = pd.to_datetime(person_df['timestamp'])
person_df.to_pickle(f'../data/{person}/df.pkl')

In [55]:
pd.read_pickle(f'../data/{person}/df.pkl')

Unnamed: 0,timestamp,position_lat,position_long,distance,enhanced_altitude,altitude,enhanced_speed,speed,unknown_61,unknown_66,...,left_power_phase,left_power_phase_peak,right_power_phase,right_power_phase_peak,unknown_87,gps_accuracy,unknown_88,unknown_108,unknown_135,unknown_136
0,2015-12-12 18:41:42,40.020130,-105.298212,0.00,1698.8,1698.8,0.000,0.000,10972.0,-503.0,...,,,,,,,,,,
1,2015-12-12 18:41:45,40.020175,-105.298339,0.00,1698.8,1698.8,4.535,4.535,10972.0,-503.0,...,,,,,,,,,,
2,2015-12-12 18:41:56,40.020027,-105.298184,20.55,1698.8,1698.8,3.602,3.602,10972.0,-504.0,...,,,,,,,,,,
3,2015-12-12 18:41:57,40.020008,-105.298165,23.13,1699.0,1699.0,3.536,3.536,10973.0,-504.0,...,,,,,,,,,,
4,2015-12-12 18:42:01,40.019966,-105.298203,29.29,1699.4,1699.4,3.303,3.303,10975.0,-504.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
895,2021-02-22 22:33:38,39.575214,-106.071290,5373.77,2762.0,,3.238,,,,...,,,,,,,,,176.0,116.0
896,2021-02-22 22:33:42,39.575337,-106.071283,5387.52,2761.4,,3.294,,,,...,,,,,,,,,176.0,116.0
897,2021-02-22 22:33:45,39.575418,-106.071303,5396.64,2761.0,,3.294,,,,...,,,,,,,,,147.0,114.0
898,2021-02-22 22:33:49,39.575504,-106.071327,5406.38,2760.6,,2.949,,,,...,,,,,,,,,147.0,113.0
