## GPX Viewer

Load GPX file, allow edit/addition of metadata and save/load to Parquet

### Parquet

https://towardsdatascience.com/saving-metadata-with-dataframes-71f51f558d8e

https://towardsdatascience.com/parquet-best-practices-discover-your-data-without-loading-them-f854c57a45b6

### Arrow

https://github.com/apache/arrow

https://arrow.apache.org

https://arrow.apache.org/docs/python/


In [1]:
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import json

In [2]:
import warnings
from pathlib import Path
from dataclasses import dataclass
from datetime import datetime

import pandas as pd
from gpx_converter import Converter

import helper_folium


warnings.simplefilter(action="ignore", category=FutureWarning)

In [3]:

@dataclass
class Track:
    name: str
    date: datetime.date
    time: datetime.time
    data: pd.DataFrame



def extract_track_metadata(gpx_file):
    if gpx_file is not None:
        tmp = gpx_file.name.replace("route_", "").replace(".gpx", "").replace("_", " ")
        track_date = datetime.strptime(tmp, "%Y-%m-%d %H.%M%p")
        track_df = Converter(input_file=gpx_file).gpx_to_dataframe()
        track = Track(name="undefined", date=track_date.date(), time=track_date.time(), data=track_df)
        return track
    else:
        return Track()
    

def plot_track(track):
    df = track.data[["latitude", "longitude"]]
    df.columns = ["lat", "lon"]
    helper_folium.create_walk_map_non_streamlit(df, workout_info=[])
    return None

In [4]:
DATA_PATH = "/Users/mjboothaus/code/github/mjboothaus/emmaus-walking-data/data/apple_health_export/workout-routes/"
EXAMPLE_GPX_FILENAME = DATA_PATH + "route_2017-08-01_7.10am.gpx"

gpx_file = EXAMPLE_GPX_FILENAME

if gpx_file is not None:
    if Path(gpx_file).exists():
        #print(Path(gpx_file).name)
        track = extract_track_metadata(Path(gpx_file))
        map = plot_track(track)
    else:
        print(f"File {gpx_file} does not exist")



In [5]:
type(map)

NoneType

In [6]:
custom_meta_content = {
    'user': 'M Booth',
    'coord': '55.9533° N, 3.1883° W',
    'time': '2020-10-17T03:59:59+0000'  # ISO-8601
}

In [7]:
custom_meta_content

{'user': 'M Booth',
 'coord': '55.9533° N, 3.1883° W',
 'time': '2020-10-17T03:59:59+0000'}

In [8]:
custom_meta_key = "data_info"

In [9]:
table = pa.Table.from_pandas(df)   # Question - same method available in Polars?

NameError: name 'df' is not defined

In [None]:
table

In [None]:
table["temp"]

In [None]:
print(table.schema.metadata)


In [None]:
type(table.schema.metadata)

In [None]:
import pprint

In [None]:
pprint.pprint(table.schema.metadata)

In [None]:
def create_combined_metadata(custom_meta_content, table):
    custom_meta_json = json.dumps(custom_meta_content)
    existing_meta = table.schema.metadata
    combined_meta = {
        custom_meta_key.encode() : custom_meta_json.encode(),
        **existing_meta
    }
    return combined_meta

In [None]:
combined_meta = create_combined_metadata(custom_meta_content, table)

In [None]:
combined_meta

In [None]:
table = table.replace_schema_metadata(combined_meta)

In [None]:
print(table.schema.metadata)

In [None]:
pq.write_table(table, '../data/example.parquet', compression='GZIP')

In [None]:
restored_table = pq.read_table('../data/example.parquet')


In [None]:
restored_df = restored_table.to_pandas()

In [None]:
restored_df

In [None]:
def read_parquet_metadata(parquet_file, custom_meta_key):
    restored_table = pq.read_table(parquet_file)
    restored_meta_json = restored_table.schema.metadata[custom_meta_key.encode()]
    return json.loads(restored_meta_json)



In [None]:
meta_data = read_parquet_metadata(parquet_file="../data/example.parquet", custom_meta_key=custom_meta_key)

In [None]:
meta_data