In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import ARDRegression

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from zipfile import ZipFile

import datetime
import json
import re
import shutil

from tqdm import tqdm

import seaborn as sns

import ipywidgets
import os

import tensorflow as tf
import tensorflow.keras as keras

2024-01-20 10:52:46.451429: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Poids

In [2]:
import_pathname = '/home/benjamin/Folders_Python/Weight_imports/'

In [171]:
# Récupère données POIDS

# récupère fichier zip le plus récent de HealthMate Withings
withings_filename = import_pathname + 'poids'  
with ZipFile(withings_filename, 'r') as weight_zip:
    weight_csv = weight_zip.extract('weight.csv', path=import_pathname)

# extrait la dataframe poids ----------------
colnames = ['Date', 'Poids (kg)', 'Gras (kg)', 'Masse osseuse (kg)', 'Masse musculaire (kg)', 'Hydratation (kg)']
df_weight = pd.read_csv(weight_csv, usecols=colnames)
os.remove(weight_csv)
df_weight.rename(columns = {'Poids (kg)' : 'Masse_Totale' , 
                            'Gras (kg)' : 'Masse_Grasse',
                            'Masse osseuse (kg)' : 'Masse_Osseuse',
                            'Masse musculaire (kg)' : 'Masse_Musculaire',
                            'Hydratation (kg)' : 'Masse_Hydrique'
                            }, 
                 inplace=True)

# transforme le champ str de Date en datetime objectdf_weight['std_MT'] = df_std['Masse_Totale']
date_format = '%Y-%m-%d %H:%M:%S'
df_weight['Date'] = df_weight['Date'].apply(lambda x : datetime.datetime.strptime(x, date_format).date())

# time cutoff
# on prend les data depuis le 1er Août 2020
# start_date = datetime.date(2020, 9, 1)
# df_weight = df_weight[df_weight['Date'] >= start_date]

# calcule MG% et BMR
subset = ['Masse_Totale', 'Masse_Grasse']
df_weight.dropna(subset = subset, inplace=True)
df_weight['MG%'] = df_weight['Masse_Grasse'] / df_weight['Masse_Totale']
df_weight['BMR'] = 370 + 21.6 * (df_weight['Masse_Totale'] - df_weight['Masse_Grasse'])  # Katch Mac Ardle
df_weight.sort_index(inplace=True)

# calcule moyennes et std dev par jour
df_std = df_weight.groupby('Date').std()
df_weight = df_weight.groupby('Date').mean()

for name in df_weight.columns:
    new_name = 'std_' + name
    df_weight[new_name] = df_std[name]
    
# nettoyage final
# df_weight.dropna(inplace=True)

In [172]:
df_weight

Unnamed: 0_level_0,Masse_Totale,Masse_Grasse,Masse_Osseuse,Masse_Musculaire,Masse_Hydrique,MG%,BMR,std_Masse_Totale,std_Masse_Grasse,std_Masse_Osseuse,std_Masse_Musculaire,std_Masse_Hydrique,std_MG%,std_BMR
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2001-01-01,72.000000,12.060000,3.000000,56.930000,40.060000,0.167500,1664.704000,,,,,,,
2010-10-29,94.750000,22.325000,,,,0.235566,1934.380000,0.707107,1.548564,,,,0.014586,18.175473
2010-10-30,93.216667,20.976667,,,,0.225046,1930.384000,0.057735,6.509780,,,,0.069891,141.231464
2010-10-31,92.675000,23.385000,,,,0.252336,1866.664000,0.035355,1.279863,,,,0.013906,28.408722
2010-11-01,92.950000,22.210000,,,,0.238946,1897.984000,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-01-16,72.593333,12.503333,3.002500,57.072500,40.121667,0.172237,1667.944000,0.139306,0.080941,0.006216,0.085187,0.081222,0.000874,1.927567
2024-01-17,72.378462,12.340769,3.000000,57.022308,40.098462,0.170503,1666.814154,0.026409,0.054842,0.000000,0.056296,0.061758,0.000757,1.287433
2024-01-18,73.055000,12.607500,3.020000,57.414167,40.441667,0.172574,1675.666000,0.135210,0.091266,0.006030,0.079938,0.075538,0.001012,1.802780
2024-01-19,72.494167,12.419167,3.002500,57.056667,40.122500,0.171311,1667.620000,0.158714,0.107319,0.004523,0.076555,0.068772,0.001160,1.729227


### Calories

In [173]:
# Récupère données FOOD

mfp_filename = import_pathname + "food" # données de My Fitness Pal

# Extrait les archives
    
with ZipFile(mfp_filename, 'r') as food_zip:
    output_dir = food_zip.namelist()
    target = 'Nutrition-Summary'
    for l in output_dir:
        if l[:len(target)] == target:
            food_csv = food_zip.extract(l, path=import_pathname)
            break
        
# extrait la dataframe food -------------------------------

colnames = ['Date', 'Meal', 'Calories', 'Fat (g)', 'Carbohydrates (g)', 'Protein (g)']

df_food_raw = pd.read_csv(food_csv, usecols=colnames)
os.remove(food_csv)

df_food_raw.rename(columns = {'Fat (g)' : 'Lipides' , 'Carbohydrates (g)' : 'Glucides', 'Protein (g)' : 'Proteines'}, inplace=True)

# transforme le champ str de Date en datetime object

date_format = '%Y-%m-%d'

df_food_raw['Date'] = df_food_raw['Date'].apply(lambda x : datetime.datetime.strptime(x, date_format).date())
# df_food = df_food.groupby('Date').sum(numeric_only=True)

# start_date = datetime.date(2020, 8, 1)  # on prend les data depuis le 1er Août 2020

# df_food = df_food[df_food.index >= start_date]
# df_food.dropna(inplace=True)
# df_food.sort_index(inplace=True)

In [174]:
df_food_raw

Unnamed: 0,Date,Meal,Calories,Lipides,Glucides,Proteines
0,2020-08-03,Breakfast,621.4,27.9,69.7,22.4
1,2020-08-03,Dinner,581.6,20.4,84.5,17.1
2,2020-08-03,Lunch,656.0,41.4,26.6,33.5
3,2020-08-03,Snacks,58.0,0.0,14.0,0.0
4,2020-08-04,Breakfast,1111.6,49.0,137.8,34.3
...,...,...,...,...,...,...
3675,2024-01-17,Lunch,904.6,14.3,54.5,142.0
3676,2024-01-18,Breakfast,958.8,16.1,63.0,145.9
3677,2024-01-18,Dinner,702.7,25.3,61.4,48.6
3678,2024-01-19,Dinner,900.3,20.3,119.5,68.1


In [175]:
df_food = df_food_raw.groupby('Date').sum(numeric_only=True)

In [176]:
meal_types=['Breakfast', 'Dinner', 'Lunch', 'Snacks']
data_names=['Calories', 'Lipides', 'Glucides', 'Proteines']
col_names=['Date', 'Calories', 'Lipides', 'Glucides', 'Proteines']

for meal_type in meal_types:
    df_to_add = df_food_raw[df_food_raw['Meal']==meal_type][col_names].set_index('Date')
    mapping_data_names = { data_name : meal_type + '_' + data_name for data_name in data_names }
    df_to_add.rename(columns=mapping_data_names, inplace=True)
    df_food = pd.concat([df_food, df_to_add], axis=1)

In [177]:
df_food

Unnamed: 0_level_0,Calories,Lipides,Glucides,Proteines,Breakfast_Calories,Breakfast_Lipides,Breakfast_Glucides,Breakfast_Proteines,Dinner_Calories,Dinner_Lipides,Dinner_Glucides,Dinner_Proteines,Lunch_Calories,Lunch_Lipides,Lunch_Glucides,Lunch_Proteines,Snacks_Calories,Snacks_Lipides,Snacks_Glucides,Snacks_Proteines
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2020-08-03,1917.0,89.7,194.8,73.0,621.4,27.9,69.7,22.4,581.6,20.4,84.5,17.1,656.0,41.4,26.6,33.5,58.0,0.0,14.0,0.0
2020-08-04,1992.7,67.7,267.5,78.4,1111.6,49.0,137.8,34.3,447.4,2.1,69.1,28.5,433.7,16.6,60.6,15.6,,,,
2020-08-05,1948.9,84.1,213.1,86.9,1174.2,51.6,147.1,35.7,136.7,6.7,3.2,14.8,638.0,25.8,62.8,36.4,,,,
2020-08-06,1937.2,66.4,134.3,60.0,747.0,35.3,81.4,25.8,801.3,15.5,22.6,11.8,388.9,15.6,30.3,22.4,,,,
2020-08-07,1674.0,67.5,168.9,95.2,791.9,35.5,91.6,27.3,397.5,7.2,75.4,7.9,484.6,24.8,1.9,60.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-01-15,1684.4,21.5,186.3,189.9,,,,,801.6,9.0,123.7,56.4,882.8,12.5,62.6,133.5,,,,
2024-01-16,1655.6,56.8,153.3,141.7,,,,,808.4,36.4,89.6,35.3,847.2,20.4,63.7,106.4,,,,
2024-01-17,1637.2,29.5,174.6,183.0,,,,,732.6,15.2,120.1,41.0,904.6,14.3,54.5,142.0,,,,
2024-01-18,1661.5,41.4,124.4,194.5,958.8,16.1,63.0,145.9,702.7,25.3,61.4,48.6,,,,,,,,


### Exercice

In [179]:
def extract_data_training(exo_dict):
    # utility fonction pour récupérer : date, durée, type exercice et calories dépensées
    
    # récupère date
    pattern = '2(\d){3,3}-(\d){2,2}-(\d){2,2}'  # on cherche une date du type 2xxx-yy-zz
    p = re.compile(pattern)
    m = p.search(exo_dict.get('startTime'))
    if m: 
        time_format = '%Y-%m-%d'
        exo_date = datetime.datetime.strptime( exo_dict.get('startTime')[:10], time_format ).date()
    
    # récupère durée
    pattern = '(\d)+[.]?(\d)*'  # on cherche une durée du type xxxx.yyyy
    p = re.compile(pattern)
    m = p.search(exo_dict.get('duration'))
    if m:
        exo_duration = float(m.group())
    
    # récupère type activité et calories dépensées
    d = exo_dict.get('exercises')[0]   
    exo_type = d.get('sport')
    if d.get('kiloCalories'):
        exo_cals = float(d.get('kiloCalories'))
    else:
        exo_cals = 0.0
    
    return exo_date, exo_duration, exo_type, exo_cals

In [94]:
# Récupère les données EXERCICE

polar_filename = import_pathname + "polar"  # données de Polar

with ZipFile(polar_filename, 'r') as polar_zip:
    output_dir = polar_zip.namelist()
    
    target = 'training-session'
    list_all_exos = []
    for i, enr_name in enumerate(tqdm(output_dir)): # on parcourt la liste des archives
        if enr_name[:len(target)] == target:  # si c'est un enregistrement d'une session de training, on traite
            enr_json = polar_zip.extract(enr_name, path=import_pathname)   # extraction du json
            with open(enr_json, 'r') as f:
                exo_dict = json.load(f)
                exo_date, exo_duration, exo_type, exo_cals = extract_data_training(exo_dict)
                list_all_exos.append([exo_date, exo_type, exo_duration, exo_cals])
                # if dict_all_exos.get(exo_date) == None:
                #     dict_all_exos[exo_date] = [ exo_duration, exo_cals]
                # else:
                #     dict_all_exos[exo_date] = [ dict_all_exos[exo_date][0] + exo_duration, dict_all_exos[exo_date][1] + exo_cals ]
                # # print(f'exercice {i+1} : date = {exo_date}, durée = {exo_duration}, type = {exo_type}, cals = {exo_cals}')
            os.remove(enr_json)

100%|██████████| 1828/1828 [00:54<00:00, 33.59it/s]


In [180]:
col_names = ['Date', 'exo_type', 'exo_secondes', 'exo_cals_bruts']
df_exos = pd.DataFrame(list_all_exos, columns=col_names)

In [181]:
df_exos

Unnamed: 0,Date,exo_type,exo_secondes,exo_cals_bruts
0,2022-05-23,STRENGTH_TRAINING,2040.749,265.0
1,2023-08-20,STRENGTH_TRAINING,5220.729,442.0
2,2023-09-27,INDOOR_ROWING,3660.599,277.0
3,2022-12-26,STRENGTH_TRAINING,4260.740,598.0
4,2023-04-03,WALKING,4122.586,451.0
...,...,...,...,...
1008,2022-12-08,WALKING,809.472,127.0
1009,2022-06-24,INDOOR_ROWING,1020.720,189.0
1010,2022-03-08,INDOOR_ROWING,1902.093,260.0
1011,2021-04-06,STRENGTH_TRAINING,3002.443,205.0


In [182]:
df_exercices = df_exos.groupby('Date').sum(numeric_only=True)

In [183]:
df_exercices

Unnamed: 0_level_0,exo_secondes,exo_cals_bruts
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-03-29,3220.826,328.0
2021-03-30,2818.678,449.0
2021-03-31,3186.236,272.0
2021-04-03,3436.335,344.0
2021-04-04,2682.132,494.0
...,...,...
2023-12-31,6005.953,671.0
2024-01-03,3128.453,251.0
2024-01-04,1621.203,245.0
2024-01-05,2220.597,202.0


In [184]:
exo_types=list(df_exos['exo_type'].unique())

data_names=['exo_secondes', 'exo_cals_bruts']
col_names=['Date'] + data_names

for exo_type in exo_types:
    df_to_add = df_exos[df_exos['exo_type']==exo_type][col_names].groupby('Date').sum()
    mapping_data_names = { data_name : exo_type + '_' + data_name for data_name in data_names }
    df_to_add.rename(columns=mapping_data_names, inplace=True)
    df_exercices = pd.concat([df_exercices, df_to_add], axis=1)

In [185]:
df_exercices

Unnamed: 0_level_0,exo_secondes,exo_cals_bruts,STRENGTH_TRAINING_exo_secondes,STRENGTH_TRAINING_exo_cals_bruts,INDOOR_ROWING_exo_secondes,INDOOR_ROWING_exo_cals_bruts,WALKING_exo_secondes,WALKING_exo_cals_bruts,BOXING_exo_secondes,BOXING_exo_cals_bruts,...,CROSS-COUNTRY_SKIING_exo_secondes,CROSS-COUNTRY_SKIING_exo_cals_bruts,ROWING_exo_secondes,ROWING_exo_cals_bruts,TREADMILL_RUNNING_exo_secondes,TREADMILL_RUNNING_exo_cals_bruts,SWIMMING_exo_secondes,SWIMMING_exo_cals_bruts,DOWNHILL_SKIING_exo_secondes,DOWNHILL_SKIING_exo_cals_bruts
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-03-29,3220.826,328.0,3220.826,328.0,,,,,,,...,,,,,,,,,,
2021-03-30,2818.678,449.0,,,,,,,,,...,,,,,,,,,,
2021-03-31,3186.236,272.0,3186.236,272.0,,,,,,,...,,,,,,,,,,
2021-04-03,3436.335,344.0,3436.335,344.0,,,,,,,...,,,,,,,,,,
2021-04-04,2682.132,494.0,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-31,6005.953,671.0,6005.953,671.0,,,,,,,...,,,,,,,,,,
2024-01-03,3128.453,251.0,3128.453,251.0,,,,,,,...,,,,,,,,,,
2024-01-04,1621.203,245.0,,,1621.203,245.0,,,,,...,,,,,,,,,,
2024-01-05,2220.597,202.0,2220.597,202.0,,,,,,,...,,,,,,,,,,


In [186]:
df_exercices.describe(include='all').transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
exo_secondes,753.0,4440.830564,2305.557219,900.0,2776.741,4140.713,5734.301,28800.0
exo_cals_bruts,753.0,485.288181,230.554819,0.0,315.0,450.0,609.0,2290.0
STRENGTH_TRAINING_exo_secondes,463.0,4442.945769,1445.718277,780.713,3375.6545,4620.736,5296.72,8762.855
STRENGTH_TRAINING_exo_cals_bruts,463.0,422.788337,168.704776,48.0,304.0,411.0,531.0,1099.0
INDOOR_ROWING_exo_secondes,302.0,2377.935281,1256.600346,194.716,1760.71175,2100.716,2837.21275,13800.74
INDOOR_ROWING_exo_cals_bruts,302.0,383.817881,208.39114,18.0,269.25,340.0,488.75,2290.0
WALKING_exo_secondes,83.0,4971.983904,3820.032777,437.735,2430.7275,3840.0,7516.4155,28800.0
WALKING_exo_cals_bruts,83.0,429.361446,316.497508,0.0,180.0,347.0,619.0,1243.0
BOXING_exo_secondes,4.0,1199.98575,375.262873,900.704,990.70325,1077.4065,1286.689,1744.426
BOXING_exo_cals_bruts,4.0,168.0,54.080187,113.0,137.0,159.5,190.5,240.0


### Full Dataset

In [187]:
dataset = pd.concat([df_weight, df_food, df_exercices], axis=1).sort_index()

In [189]:
dataset

Unnamed: 0_level_0,Masse_Totale,Masse_Grasse,Masse_Osseuse,Masse_Musculaire,Masse_Hydrique,MG%,BMR,std_Masse_Totale,std_Masse_Grasse,std_Masse_Osseuse,...,CROSS-COUNTRY_SKIING_exo_secondes,CROSS-COUNTRY_SKIING_exo_cals_bruts,ROWING_exo_secondes,ROWING_exo_cals_bruts,TREADMILL_RUNNING_exo_secondes,TREADMILL_RUNNING_exo_cals_bruts,SWIMMING_exo_secondes,SWIMMING_exo_cals_bruts,DOWNHILL_SKIING_exo_secondes,DOWNHILL_SKIING_exo_cals_bruts
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2001-01-01,72.000000,12.060000,3.000000,56.930000,40.060000,0.167500,1664.704000,,,,...,,,,,,,,,,
2010-10-29,94.750000,22.325000,,,,0.235566,1934.380000,0.707107,1.548564,,...,,,,,,,,,,
2010-10-30,93.216667,20.976667,,,,0.225046,1930.384000,0.057735,6.509780,,...,,,,,,,,,,
2010-10-31,92.675000,23.385000,,,,0.252336,1866.664000,0.035355,1.279863,,...,,,,,,,,,,
2010-11-01,92.950000,22.210000,,,,0.238946,1897.984000,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-01-16,72.593333,12.503333,3.002500,57.072500,40.121667,0.172237,1667.944000,0.139306,0.080941,0.006216,...,,,,,,,,,,
2024-01-17,72.378462,12.340769,3.000000,57.022308,40.098462,0.170503,1666.814154,0.026409,0.054842,0.000000,...,,,,,,,,,,
2024-01-18,73.055000,12.607500,3.020000,57.414167,40.441667,0.172574,1675.666000,0.135210,0.091266,0.006030,...,,,,,,,,,,
2024-01-19,72.494167,12.419167,3.002500,57.056667,40.122500,0.171311,1667.620000,0.158714,0.107319,0.004523,...,,,,,,,,,,


### Nettoyage

In [192]:
# time cutoff

start_date = datetime.date(2020, 9, 1)
dataset = dataset[dataset.index >= start_date]

In [193]:
dataset

Unnamed: 0_level_0,Masse_Totale,Masse_Grasse,Masse_Osseuse,Masse_Musculaire,Masse_Hydrique,MG%,BMR,std_Masse_Totale,std_Masse_Grasse,std_Masse_Osseuse,...,CROSS-COUNTRY_SKIING_exo_secondes,CROSS-COUNTRY_SKIING_exo_cals_bruts,ROWING_exo_secondes,ROWING_exo_cals_bruts,TREADMILL_RUNNING_exo_secondes,TREADMILL_RUNNING_exo_cals_bruts,SWIMMING_exo_secondes,SWIMMING_exo_cals_bruts,DOWNHILL_SKIING_exo_secondes,DOWNHILL_SKIING_exo_cals_bruts
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-09-01,101.250000,33.020000,,,,0.326123,1843.768000,,,,...,,,,,,,,,,
2020-09-02,101.150000,31.945000,,,,0.315818,1864.828000,0.000000,0.106066,,...,,,,,,,,,,
2020-09-03,101.200000,30.930000,,,,0.305632,1887.832000,,,,...,,,,,,,,,,
2020-09-04,100.800000,29.460000,,,,0.292262,1910.944000,,,,...,,,,,,,,,,
2020-09-05,100.600000,31.450000,,,,0.312624,1863.640000,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-01-16,72.593333,12.503333,3.002500,57.072500,40.121667,0.172237,1667.944000,0.139306,0.080941,0.006216,...,,,,,,,,,,
2024-01-17,72.378462,12.340769,3.000000,57.022308,40.098462,0.170503,1666.814154,0.026409,0.054842,0.000000,...,,,,,,,,,,
2024-01-18,73.055000,12.607500,3.020000,57.414167,40.441667,0.172574,1675.666000,0.135210,0.091266,0.006030,...,,,,,,,,,,
2024-01-19,72.494167,12.419167,3.002500,57.056667,40.122500,0.171311,1667.620000,0.158714,0.107319,0.004523,...,,,,,,,,,,


In [194]:
dataset.describe(include='all').transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Masse_Totale,1228.0,73.375685,7.106499,66.0,69.2325,71.284286,73.167857,101.25
Masse_Grasse,1228.0,11.928024,4.650001,7.541667,8.925937,10.778125,11.992364,33.02
Masse_Osseuse,322.0,3.033345,0.059992,2.934286,2.9975,3.019583,3.0565,3.4
Masse_Musculaire,322.0,57.500714,0.962663,55.7,56.885972,57.317333,57.9325,61.84
Masse_Hydrique,322.0,40.8252,1.127273,38.921429,40.121875,40.5575,41.284152,47.9
MG%,1228.0,0.158741,0.040631,0.112927,0.128734,0.150843,0.165863,0.326123
BMR,1228.0,1697.269482,55.846954,1626.348571,1663.96,1678.711,1704.6235,1917.928
std_Masse_Totale,1212.0,0.069255,0.046173,0.0,0.037796,0.056045,0.089423,0.588509
std_Masse_Grasse,1212.0,0.085519,0.072286,0.0,0.054744,0.073824,0.095562,1.036352
std_Masse_Osseuse,321.0,0.004754,0.003839,0.0,0.00378,0.00488,0.005345,0.063631
