In [1]:
import tempfile
from pathlib import Path

import activityio as aio
import boto3
import pandas as pd
import s3fs
import tomli
import urllib.parse


In [2]:
from emmaus_walking.core import get_project_root

In [3]:
get_project_root()

PosixPath('/Users/mjboothaus/code/github/mjboothaus/emmaus_walking')

In [4]:
def load_config_file(config_file):
    with open(get_project_root() / config_file, encoding="utf-8") as f:
        app_config = tomli.load(f)
    return app_config

In [5]:
APP_CONFIG_FILE = "app_config.toml"
app_config = load_config_file(APP_CONFIG_FILE)

In [6]:
app_config

{'APP': {'DATASOURCE': 'Apple Watch via Health Fit',
  'AUTHOR': 'by [DataBooth.com.au](https://www.databooth.com.au)',
  'NAME': 'Emmaus Walking Mapping App',
  'CACHED_DATAFILE': 'emmaus_walking.cache.feather',
  'LAYOUT': 'wide'},
 'S3': {'ACCESS_KEY_ID': 'SCW5MSNHMS37P6Q0ZZV2',
  'SECRET_ACCESS_KEY': 'app_secrets.toml',
  'STORAGE_BUCKET_NAME': 'emmaus-walking',
  'DEFAULT_ACL': 'public-read',
  'REGION_NAME': 'fr-par',
  'ENDPOINT_URL': 'https://s3.fr-par.scw.cloud'}}

In [7]:
APP_SECRETS_FILE = "app_secrets.toml"
app_secrets = load_config_file(APP_SECRETS_FILE)

In [8]:
s3 = s3fs.S3FileSystem(
    key=app_config["S3"]["ACCESS_KEY_ID"],
    secret=app_secrets["S3"]["SECRET_ACCESS_KEY"],
    client_kwargs=dict(
        endpoint_url=app_config["S3"]["ENDPOINT_URL"],
        region_name=app_config["S3"]["REGION_NAME"],
    ),
)

In [9]:
s3_test_path = "emmaus-walking/data/FIT/B2M/"

In [10]:
B2M_files = s3.ls(s3_test_path)

In [11]:
# this doesn't solve the filename problem - need to find another solution

def fix_filename(filename):
    return urllib.parse.quote(filename)


In [12]:
B2M_files = [fix_filename(file) for file in B2M_files]



In [13]:
BAD_str = "2020-07-13-085452-Walking-Michael and Ai Leen’s Apple\xa0Watch.fit"



In [14]:
OK_str = "2020-07-13-085452-Walking-Michael and Ai Leen’s Apple Watch.fit"



In [15]:
assert [bytes(char, "utf-8") for char in BAD_str] == [
    bytes(char, "utf-8") for char in OK_str
]



In [16]:
[bytes(char, "utf-8") for char in OK_str];



In [17]:
B2M_files



['emmaus-walking/data/FIT/B2M/2020-07-13-085452-Walking-Michael%20and%20Ai%20Leen%E2%80%99s%20Apple%C2%A0Watch.fit',
 'emmaus-walking/data/FIT/B2M/2020-07-20-085713-Walking-Michael%20and%20Ai%20Leen%E2%80%99s%20Apple%C2%A0Watch.fit',
 'emmaus-walking/data/FIT/B2M/2020-07-28-104039-Walking-Michael%20and%20Ai%20Leen%E2%80%99s%20Apple%C2%A0Watch.fit',
 'emmaus-walking/data/FIT/B2M/2020-08-14-110916-Walking-Michael%20and%20Ai%20Leen%E2%80%99s%20Apple%C2%A0Watch.fit',
 'emmaus-walking/data/FIT/B2M/2020-08-21-123755-Walking-Michael%20and%20Ai%20Leen%E2%80%99s%20Apple%C2%A0Watch.fit',
 'emmaus-walking/data/FIT/B2M/2020-08-28-112353-Walking-Michael%20and%20Ai%20Leen%E2%80%99s%20Apple%C2%A0Watch.ORIG',
 'emmaus-walking/data/FIT/B2M/2020-08-28-112353-Walking-Michael%20and%20Ai%20Leen%E2%80%99s%20Apple%C2%A0Watch.fit',
 'emmaus-walking/data/FIT/B2M/2020-08-28-112353-Walking-Michael%20and%20Ai%20Leen%E2%80%99s%20Apple%C2%A0Watch_tmp.fit.OFF',
 'emmaus-walking/data/FIT/B2M/2020-09-04-104005-Walking

In [18]:
# Try dowloading a file and importing into Pandas - now working - but I'm having to use a tmp file to save/ then pd.read


def pd_read_fit_s3(filename):
    with s3.open(filename, mode="rb") as f:
        # print(f.info()['name'])
        with tempfile.TemporaryDirectory() as tmp_dirname:
            with open(Path(tmp_dirname) / "tmp.fit", "wb") as f_fit:
                f_fit.write(f.read())
                fit_df = pd.DataFrame(aio.read(Path(tmp_dirname) / "tmp.fit"))
    return fit_df, f.info()["name"], f.info()["size"]



In [19]:
# test_file = s3_test_path + B2M_files[0]
test_file = (
    s3_test_path + "2020-07-13-085452-Walking-Michael and Ai Leen’s Apple Watch.fit"
)



In [20]:
fit_df, fit_name, fit_size = pd_read_fit_s3(test_file)



In [21]:
fit_df.head()



Unnamed: 0_level_0,fractional_cadence_rpm,lap,alt,cad,dist,lat,lon,speed,temp
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0 days 00:00:00,0.703125,1,21.6,43.0,0.32,-33.89054,151.274833,1.475,12.0
0 days 00:00:01,0.703125,1,21.6,43.0,1.77,-33.890548,151.274844,1.472,12.0
0 days 00:00:02,0.703125,1,21.6,43.0,3.28,-33.890557,151.274856,1.465,12.0
0 days 00:00:03,0.085938,1,21.6,44.0,4.78,-33.890566,151.274868,1.457,12.0
0 days 00:00:04,0.085938,1,21.6,44.0,6.2,-33.890574,151.274879,1.446,12.0


In [22]:
[fit_name, fit_size]



['emmaus-walking/data/FIT/B2M/2020-07-13-085452-Walking-Michael and Ai Leen’s Apple\xa0Watch.fit',
 329534]

In [23]:
# s3.download(rpath='emmaus-walking/data/FIT/B2M', lpath='/Users/mjboothaus/tmp/scaleway_tmp/B2M', recursive=True)



In [24]:
# Not sure how to make this work
# df = pd.read_excel("s3://bucket/path/file.xls", storage_options={"anon": True})

