In [3]:
# Import dependencies
from lxml import etree as ET, objectify
from os import listdir
from os.path import isfile, join, dirname
import pandas as pd
from collections import OrderedDict
from datetime import date
import json
from glob import glob
import fnmatch
from pathlib import Path
import os

## Create DF from trackpoints

In [4]:
filepath = "activity_5618003227.tcx"

In [5]:
parser = ET.XMLParser(remove_blank_text=True)
tree = ET.parse(filepath, parser)
root = tree.getroot()

for child in root.getiterator():
    if not hasattr(child.tag, 'find'): continue  # (1)
    i = child.tag.find('}')
    if i >= 0:
        child.tag = child.tag[i + 1:]
objectify.deannotate(root, cleanup_namespaces=True)

trackpoints = [{
'HR': tp.findtext('HeartRateBpm/Value'),
'Time': tp.findtext('Time'),
'Speed': tp.findtext('Extensions/TPX/Speed'),
'Cadence': tp.findtext('Extensions/TPX/RunCadence'),
'Lat': tp.findtext('Position/LatitudeDegrees'),
'Lon': tp.findtext('Position/LongitudeDegrees'),
'Alt': tp.findtext('AltitudeMeters'),
'Distance': tp.findtext('DistanceMeters')
}
for tp in tree.xpath('//Track/Trackpoint')]

trackpt_df = pd.DataFrame(trackpoints)

In [6]:
trackpt_df

Unnamed: 0,HR,Time,Speed,Cadence,Lat,Lon,Alt,Distance
0,104,2020-10-01T16:21:43.000Z,1.465000033378601,61,39.072621166706085,-108.55783620849252,1409.199951171875,0.0
1,104,2020-10-01T16:21:44.000Z,1.4459999799728394,61,39.07264337874949,-108.55783872306347,1409.199951171875,1.3600000143051147
2,104,2020-10-01T16:21:48.000Z,2.3510000705718994,15,39.072762317955494,-108.55783176608384,1410.199951171875,11.449999809265137
3,104,2020-10-01T16:21:49.000Z,2.4260001182556152,83,39.07279375009239,-108.55783503502607,1409.4000244140625,14.949999809265137
4,105,2020-10-01T16:21:55.000Z,3.2660000324249268,83,39.072956359013915,-108.5578356217593,1409.5999755859375,35.119998931884766
...,...,...,...,...,...,...,...,...
401,171,2020-10-01T16:50:23.000Z,2.76200008392334,80,39.07328903675079,-108.55507453903556,1415.4000244140625,4828.5
402,171,2020-10-01T16:50:25.000Z,2.753000020980835,80,39.07328451052308,-108.55514788068831,1415.4000244140625,4834.06005859375
403,170,2020-10-01T16:50:27.000Z,2.753000020980835,80,39.073282331228256,-108.5552073083818,1415.4000244140625,4839.97998046875
404,172,2020-10-01T16:50:31.000Z,2.6589999198913574,80,39.07328702509403,-108.55530152097344,1415.199951171875,4847.02001953125


## Iterate all tcx files

In [77]:
import os
import tcxparser

for subdir, dirs, files in os.walk(r'/Users/loganbon/Documents/GitHub/Projects/School Projects/Digital Workout Tracker/Digital-Workout-Tracker/data/tcx'):
    for filename in files:
        filepath = subdir + os.sep + filename
        with open(filepath):
            # Parse tcx file 
            parser = ET.XMLParser(remove_blank_text=True)
            tree = ET.parse(filepath, parser)
            root = tree.getroot()
            for child in root.getiterator():
                if not hasattr(child.tag, 'find'): continue  # (1)
                i = child.tag.find('}')
                if i >= 0:
                    child.tag = child.tag[i + 1:]
            objectify.deannotate(root, cleanup_namespaces=True)
            
            # Get activity id from file name
            activity_id = filepath.split("_")[1]
            
            # Get trackpoints data
            trackpoints = [{
            'activity_id': activity_id,
            'hrt_rate': tp.findtext('HeartRateBpm/Value'),
            'activity_time': tp.findtext('Time'),
            'speed': tp.findtext('Extensions/TPX/Speed'),
            'cadence': tp.findtext('Extensions/TPX/RunCadence'),
            'lat': tp.findtext('Position/LatitudeDegrees'),
            'lon': tp.findtext('Position/LongitudeDegrees'),
            'altitude': tp.findtext('AltitudeMeters'),
            'distance': tp.findtext('DistanceMeters')
            }
            for tp in tree.xpath('//Track/Trackpoint')]
            
            # Get activity summary data
            tcx = tcxparser.TCXParser(filepath)
            activities = {
                "activity_id": activity_id,
                "activity_type": tcx.activity_type,
                "duration": tcx.duration,
                "total_distance": tcx.distance,
                "distance_units": tcx.distance_units,
#                 "avg_hrt_rate": tcx.hr_avg,    # return ZeroDivisionError
#                 "max_hrt_rate": tcx.hr_max,    # return ZeroDivisionError
                "calories": tcx.calories
            }
            print(activities)
            print(trackpoints[0])
            break

{'activity_id': '1926552470', 'activity_type': 'biking', 'duration': 13148.0, 'total_distance': 78734.03125, 'distance_units': 'meters', 'calories': 2255}
{'activity_id': '1926552470', 'hrt_rate': '97', 'activity_time': '2017-08-19T20:38:57.000Z', 'speed': '0.0', 'cadence': None, 'lat': '39.464005418121815', 'lon': '-107.3228301666677', 'altitude': '1972.5999755859375', 'distance': '0.8500000238418579'}
