In [3]:
!pip install fitdecode==0.10.0

Collecting fitdecode==0.10.0


[notice] A new release of pip available: 22.3.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip



  Downloading fitdecode-0.10.0-py3-none-any.whl (90 kB)
     ---------------------------------------- 90.9/90.9 kB ? eta 0:00:00
Installing collected packages: fitdecode
Successfully installed fitdecode-0.10.0


In [1]:
# TODO improve docstrings

In [2]:
import os
import json
import pandas as pd

In [21]:
jsons_directory = os.path.join(".", "data", "json")
fit_directory = os.path.join(".", "data", "fit")
current_directory = os.path.join(".")

In [22]:
def search_usable_fit_files(directory, extension):
    """ Returns a list of fit files (in .json or in .fit format), avoiding some of the unnecesary files """

    return [fitfile.replace(".json", "") for fitfile in os.listdir(directory) if fitfile.endswith(extension) and "inProgressActivity" not in fitfile]

def compare_files(usable_fit_files, existent_fit_files):
    """ Compares two lists of files to know the difference between them """

    return list(set(usable_fit_files) - set(existent_fit_files))

def obtain_new_files_in_folder_json_format():
    """ Adds .fit files converted into JSON in a folder, adding only the non-existent ones after comparing the existent """

    os.makedirs(jsons_directory, exist_ok=True) #Does nothing if directory exists

    usable_fit_files = search_usable_fit_files(directory=fit_directory, extension=".fit")
    existent_json_fit_files = search_usable_fit_files(directory=jsons_directory, extension=".json")
    fit_files_to_add = compare_files(usable_fit_files, existent_json_fit_files)
    print("Adding " + str(len(fit_files_to_add)) + " files")

    for fitfile in fit_files_to_add:
        json_fit_file = fitfile + ".json"
        os.system("fitjson --pretty -f=record -o {0} {1}".format(os.path.join(jsons_directory, json_fit_file), os.path.join(fit_directory, fitfile))) #Could not run subprocess well with this kind of command, os os.system solves it

def obtain_interesting_chunks(my_json):
    """ Returns only the chunks that are necessary for the dataset, avoiding unnecesary or definition frame_types """

    chunks_to_save = []
    for chunk in my_json:
        try:
            if chunk["frame_type"] == "data_message":
                chunks_to_save.append(chunk['fields'])
        except Exception as e:
            print("Excepted in chunk, ", e)
    
    return chunks_to_save

def convert_interesting_chunks_to_dataframe(interesting_chunks, training):
    """ Returns necessary records in object format, for better future convertion purposes """

    custom_records_object = []
    for record in interesting_chunks:
        custom_record_object = {element['name']: element['value'] for element in record}
        custom_record_object.update({"training": training})
        custom_records_object.append(custom_record_object)
    return custom_records_object

def open_json(thejson):
    """ Returns an opened JSON """

    with open(thejson) as file:
        return json.load(file)

def append_dataframes(dataframes_of_trainings):
    """ Returns a unique dataframe from a list of dataframes """

    return pd.concat(dataframes_of_trainings, ignore_index=True)

def obtain_dataframe_from_jsons(directory):
    """ Returns a unique dataframe to work with, orchestrating from JSON opening to that final dataset"""
    
    dataframes_of_trainings = []
    for currentjson in os.listdir(jsons_directory):
        openedjson = open_json(os.path.join(jsons_directory, currentjson))
        interesting_chunks = obtain_interesting_chunks(openedjson)
        custom_records_object = convert_interesting_chunks_to_dataframe(interesting_chunks, os.path.join(directory, currentjson))
        dataframes_of_trainings.append(pd.DataFrame([x for x in custom_records_object]))
    return append_dataframes(dataframes_of_trainings)

In [23]:
import datetime
momento1 = datetime.datetime.now()
obtain_new_files_in_folder_json_format()
print(datetime.datetime.now() - momento1)
momento1 = datetime.datetime.now()
dataframe = obtain_dataframe_from_jsons(jsons_directory)
print(datetime.datetime.now() - momento1)

Adding 0 files
0:00:00.001001
0:00:00.644682


In [24]:
import datetime
momento1 = datetime.datetime.now()
dataframe = obtain_dataframe_from_jsons(jsons_directory)
print(datetime.datetime.now() - momento1)

0:00:00.659350


In [26]:
dataframe.count()

timestamp                    14663
position_lat                 14663
position_long                14663
distance                         0
time_from_course                 0
speed                        14663
compressed_speed_distance        0
heart_rate                   14663
enhanced_altitude            14663
altitude                     14663
enhanced_speed               14663
power                        14663
grade                            0
cadence                      14663
resistance                       0
cycle_length                     0
temperature                      0
training                     14663
dtype: int64