In [1]:
# default_exp datapipe
from nbdev import *

# Datapipe - Emmaus Walking Streamlit App

> This is the module for creating the data pipeline.
> It should also be used to perform the data pre-processing and caching.

So what needs to be done:

### Pre-process (prep) the data [do this ONCE and only ONCE - they put in some re-useable form e.g. Quilt on S3.

0. Think about capturing walk metadata / do on a per-overall-walk basis 
1. Need to "extract" the data from the .fit files
2. Clean/fix the data (e.g. allow for breaks in walk, change in order, not turning off walk at end)
3. Concatenate into a single data-structure per overall walk 
4. Store in "database" e.g. sqlite, postgres?, files, or is Quilt sufficient?


In [2]:
#export
import os
import pandas as pd
import activityio as aio
from dateutil.parser import parse
import datetime as dt

In [3]:
#export
def calc_walk_stats(walk_data):
    total_time = dt.timedelta(0)
    total_distance = 0

    for iHike, hike in enumerate(walk_data):
        total_time += hike.index.max()
        # print(iHike+1, walk_date[iHike], hike.index.max(), hike['dist'].max() / 1e3)
        total_distance += hike['dist'].max()
    total_distance /= 1e3

    start_coord = walk_data[0][['lat', 'lon']].iloc[0].tolist()
    end_coord = walk_data[-1][['lat', 'lon']].iloc[-1].tolist()
    return total_time, total_distance, start_coord, end_coord


# TODO: use st.cache() and also look to pre-load and cache/feather data (or similar) - NB: use of @st.cache() below didn't work
def load_and_cache_raw_walk_data(walk_name, sample_freq):
    FIT_FILE_PATH = '/Users/mjboothaus/iCloud/Data/HealthFit/'
    data_dir = FIT_FILE_PATH + walk_name[0:3] + '/'
    data_files = [file for file in os.listdir(data_dir) if file.endswith('.fit')]
    walk_files = sorted(data_files)

    walk_data = []
    walk_date = []

    for iFile, file in enumerate(walk_files):
        walk_data.append(pd.DataFrame(aio.read(data_dir + file)))
        walk_date.append(parse(file[0:17]))
               
    total_time, total_distance, start_coord, end_coord = calc_walk_stats(walk_data)
    print(start_coord)
    walk_merged = pd.concat(walk_data)
    points = walk_merged[['lat', 'lon']].values.tolist()
    points = [tuple(point) for ipoint, point in enumerate(points) if ipoint % sample_freq == 0]
    return walk_data, walk_date, walk_files, points

In [4]:
walk_data, walk_date, walk_files, points = load_and_cache_raw_walk_data('B2M', 50)

[-33.84472858160734, 151.2182762939483]


In [5]:
len(points)

796

In [6]:
points;

### Extract GPS data from the photos for the walks where the GPS data was unavailable from sync issues with AppleWatch (7.0) and iPhone (14.0) update issues

In [7]:
from GPSPhoto import gpsphoto # requires GPSPhoto, exifread & piexif libraries

In [8]:
PHOTO_DIR1 = r'/Users/mjboothaus/iCloud/Data/HealthFit/B2M/M2P/Queenscliff & North Curl Curl, 21 September 2020/'

In [9]:
PHOTO_DIR2 = r'/Users/mjboothaus/iCloud/Data/HealthFit/B2M/M2P/Newport, 28 September 2020/'

In [10]:
LEN_FILENAME_1 = 23
LEN_FILENAME_2 = 20

In [11]:
def extract_gps_data_from_photos(LEN_FILENAME, PHOTO_DIR):
    photo_files = [file for file in os.listdir(PHOTO_DIR) if file.endswith('.jpeg')]
    photo_files.sort(key=lambda x : int(x[LEN_FILENAME:].replace('.jpeg', '')))
    photo_GPS = []
    for iFile, file in enumerate(photo_files):
        gps_data = gpsphoto.getGPSData(PHOTO_DIR + file)
        try:
            photo_GPS.append((gps_data['Latitude'], gps_data['Longitude']))
        except:
            pass
    return photo_GPS

In [12]:
photo_GPS_1 = extract_gps_data_from_photos(LEN_FILENAME_1, PHOTO_DIR1)

In [13]:
photo_GPS_2 = extract_gps_data_from_photos(LEN_FILENAME_2, PHOTO_DIR2)

In [14]:
import folium

In [15]:
start_coord = [-33.89054004102945, 151.27483293414116]

In [16]:
map_handle = folium.Map(start_coord, zoom_start=13, detect_retina=True, control_scale=True)

In [17]:
def plot_walk_points(walk_points, map_handle, linecolour, linewidth):
    folium.PolyLine(walk_points, color=linecolour, weight=linewidth).add_to(map_handle)

In [18]:
plot_walk_points(photo_GPS_1, map_handle, 'blue', 6)

In [19]:
plot_walk_points(photo_GPS_2, map_handle, 'blue', 6)

In [20]:
map_handle.fit_bounds(map_handle.get_bounds())

In [21]:
map_handle

In [22]:
# TODO: Need to save down this data into some sort of "standardised" format for this particular App 
#       and also complete doing ALL the pre-processing of data (and/or look at caching function)

In [23]:
# TODO: Also look to extract the date/time (original) information [meta-data] to construct walk stats