In [49]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from zipfile import ZipFile

import datetime
import json
import re

In [40]:
pathname = "/home/benjamin/Folders_Python/Weight/imports/"

polar_filename = pathname + "TBD.zip"  # données de Polar

# next : aller chercher les archives zip les plus récentes commençant par data_BEN et File-Export...

In [41]:
# Récupère données POIDS

# Source : Withings https://healthmate.withings.com/

# Account > User > Parameters > Download ==> data_BEN_xxxxx.zip ==> contient weight.csv

withings_filename = pathname + "data_BEN.zip"  # données de HealthMate

with ZipFile(withings_filename, 'r') as weight_zip:
    weight_csv = weight_zip.extract('weight.csv', path=pathname)

# extrait la dataframe poids ----------------

colnames = ['Date', 'Poids (kg)', 'Gras (kg)']

df_weight = pd.read_csv(weight_csv, usecols=colnames)
df_weight.rename(columns = {'Poids (kg)' : 'Masse_Totale' , 'Gras (kg)' : 'Masse_Grasse'}, inplace=True)

# transforme le champ str de Date en datetime object

date_format = '%Y-%m-%d %H:%M:%S'

df_weight['Date'] = df_weight['Date'].apply(lambda x : datetime.datetime.strptime(x, date_format).date())
df_weight = df_weight.groupby('Date').mean()

start_date = datetime.date(2020, 8, 1)  # on prend les data depuis le 1er Août 2020

df_weight = df_weight[df_weight.index >= start_date]
df_weight.dropna(inplace=True)

df_weight['Masse_Maigre'] = df_weight['Masse_Totale'] - df_weight['Masse_Grasse']
df_weight['BMR'] = 370 + 21.6 * df_weight['Masse_Maigre']

In [42]:
df_weight.tail(10)

Unnamed: 0_level_0,Masse_Totale,Masse_Grasse,Masse_Maigre,BMR
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-12-24,72.5,10.766,61.734,1703.4544
2022-12-25,72.478571,11.037143,61.441429,1697.134857
2022-12-26,71.95,10.984286,60.965714,1686.859429
2022-12-27,72.68,11.192,61.488,1698.1408
2022-12-28,72.01875,11.90875,60.11,1668.376
2022-12-29,71.566667,11.422222,60.144444,1669.12
2022-12-30,71.31,11.21,60.1,1668.16
2022-12-31,71.81,10.89,60.92,1685.872
2023-01-01,71.8,9.5,62.3,1715.68
2023-01-02,71.2,9.5,61.7,1702.72


In [43]:
# Récupère données FOOD

# https://www.myfitnesspal.com/reports

# www.myfitnesspal.com ==> reports > export data ==> File-Export-date1-to-date2.zip

mfp_filename = pathname + "File-Export.zip" # données de My Fitness Pal

# Extrait les archives
    
with ZipFile(mfp_filename, 'r') as food_zip:
    output_dir = food_zip.namelist()
    target = 'Nutrition-Summary'
    for l in output_dir:
        if l[:len(target)] == target:
            food_csv = food_zip.extract(l, path=pathname)
            break
        
# extrait la dataframe food -------------------------------

colnames = ['Date', 'Meal', 'Calories', 'Fat (g)', 'Carbohydrates (g)', 'Protein (g)']

df_food = pd.read_csv(food_csv, usecols=colnames)
df_food.rename(columns = {'Fat (g)' : 'Lipides' , 'Carbohydrates (g)' : 'Glucides', 'Protein (g)' : 'Proteines'}, inplace=True)

# transforme le champ str de Date en datetime object

date_format = '%Y-%m-%d'

df_food['Date'] = df_food['Date'].apply(lambda x : datetime.datetime.strptime(x, date_format).date())
df_food = df_food.groupby('Date').sum()

start_date = datetime.date(2020, 8, 1)  # on prend les data depuis le 1er Août 2020

df_food = df_food[df_food.index >= start_date]
df_food.dropna(inplace=True)

In [44]:
df_food.tail(10)

Unnamed: 0_level_0,Calories,Lipides,Glucides,Proteines
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-12-25,1850.3,34.7,166.0,160.7
2022-12-26,2328.3,58.3,273.1,173.1
2022-12-27,1611.4,37.5,120.1,189.5
2022-12-28,1767.4,39.0,180.2,166.6
2022-12-29,1784.0,33.7,179.0,189.0
2022-12-30,1666.7,36.4,141.3,191.9
2022-12-31,3197.2,66.4,424.4,203.1
2023-01-01,1726.4,38.4,152.0,195.1
2023-01-02,2044.6,47.1,233.7,173.5
2023-01-03,1067.9,17.1,72.1,157.9


In [52]:
def extract_data_training(exo_dict):
    # utility fonction pour récupérer : date, durée, type exercice et calories dépensées
    
    time_format = '%Y-%m-%d'
    exo_date = datetime.datetime.strptime( exo_dict.get('startTime')[:10], time_format )
    
    duration_str = exo_dict.get('duration')[2:]
    m = re.search('\D',duration_str) # cherche le premier caractère non numérique dans la string
    c = m.group(0)
    idx = duration_str.find(c)
    if idx != -1:
        duration_str = duration_str[:idx]
    
    exo_duration = float(duration_str)
    
    d = exo_dict.get('exercises')[0]
    
    exo_type = d.get('sport')
    exo_cals = d.get('kiloCalories')
    
    return exo_date, exo_duration, exo_type, exo_cals

In [53]:
# Récupère les données EXERCICE

# https://account.polar.com/#export ==> polar-user-data-export-xxxxx.zip

polar_filename = pathname + "polar-user-data-export.zip"  # données de HealthMate

with ZipFile(polar_filename, 'r') as polar_zip:
    output_dir = polar_zip.namelist()
    
    target = 'training-session'
    for i, enr_name in enumerate(output_dir): # on parcourt la liste des archives
        if enr_name[:len(target)] == target:  # si c'est un enregistrement d'une session de training, on traite
            enr_json = polar_zip.extract(enr_name, path=pathname)   # extraction du json
            with open(enr_json, 'r') as f:
                exo_dict = json.load(f)
                exo_date, exo_duration, exo_type, exo_cals = extract_data_training(exo_dict)
                print(f'exercice {i+1} : date = {exo_date}, durée = {exo_duration}, type = {exo_type}, cals = {exo_cals}')
            os.remove(enr_json)
            pass


# # extrait la dataframe exercices -------------------------------

# colnames = ['Début', 'Fin', "Type d'activité", 'Données']

# df_exercices = pd.read_csv(exercice_csv, usecols=colnames)
# df_exercices.rename(columns = {'Début' : 'Debut' , "Type d'activité" : "Type", "Données" : "Donnees" }, inplace=True)

# # transforme le champ str de Date en datetime object

# date_format = '%Y-%m-%dT'

# # df_food['Date'] = df_food['Date'].apply(lambda x : datetime.datetime.strptime(x, date_format).date())
# # df_food = df_food.groupby('Date').sum()

# # start_date = datetime.date(2020, 8, 1)  # on prend les data depuis le 1er Août 2020

# # df_food = df_food[df_food.index >= start_date]
# # df_food.dropna(inplace=True)

exercice 1 : date = 2022-01-26 00:00:00, durée = 1798.0, type = INDOOR_ROWING, cals = 235
exercice 2 : date = 2022-03-13 00:00:00, durée = 1860.0, type = INDOOR_ROWING, cals = 301
exercice 3 : date = 2022-04-21 00:00:00, durée = 797.0, type = INDOOR_ROWING, cals = 89
exercice 4 : date = 2022-04-25 00:00:00, durée = 1286.0, type = INDOOR_ROWING, cals = 181
exercice 5 : date = 2021-09-17 00:00:00, durée = 2225.0, type = STRENGTH_TRAINING, cals = 226
exercice 6 : date = 2022-05-28 00:00:00, durée = 6900.0, type = INDOOR_ROWING, cals = 1034
exercice 7 : date = 2022-02-06 00:00:00, durée = 3237.0, type = INDOOR_ROWING, cals = 449
exercice 8 : date = 2022-05-04 00:00:00, durée = 4898.0, type = STRENGTH_TRAINING, cals = 433
exercice 9 : date = 2021-11-26 00:00:00, durée = 7190.0, type = STRENGTH_TRAINING, cals = 671
exercice 10 : date = 2021-12-18 00:00:00, durée = 4841.0, type = STRENGTH_TRAINING, cals = 423
exercice 11 : date = 2021-07-31 00:00:00, durée = 2534.0, type = INDOOR_ROWING, cals