In [1]:
from sqlalchemy import create_engine
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Fixes the issue with the MySQLdb module not being found.
import pymysql
pymysql.install_as_MySQLdb()

In [2]:
# Input MySQL username and password for grant restricted user.
username = 'diet_user'
password = 'diet_user'

# Manually entered for privacy purposes.
aws_identifier = input('Enter AWS server identifier: ')

# Create the MySQL URL.
mysql_url = create_engine(f'mysql://{username}:{password}@aws-diet-exercise-db.{aws_identifier}.us-east-2.rds.amazonaws.com:3306/diet')

# Call MySQL stored procedure to get the current food entries.
foods_df = pd.read_sql('''CALL GetFoodsByMealAndDateFromFoods();''',mysql_url, index_col='id')

foods_df.head(10)

Unnamed: 0_level_0,date,meal,food_name,servings,carbs,fats,proteins,calories
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
353,2023-08-03,Breakfast,Ramen,1,54,14,8,370
352,2023-08-03,Breakfast,Eggs,3,0,5,6,70
351,2023-08-03,Breakfast,Pork Bacon,2,0,7,7,90
350,2023-08-03,Post-Workout,Protein Powder,4,3,2,24,120
349,2023-08-03,Post-Workout,Macaroni Salad,2,30,17,5,310
348,2023-08-03,Dinner,Ground Chuck 4oz,1,0,20,31,308
347,2023-08-03,Dinner,Cooked Pasta 140g,1,43,1,8,220
346,2023-08-03,Lunch,Steak Ques.,1,42,27,26,520
345,2023-08-03,Breakfast,Ramen,1,54,14,8,370
344,2023-08-03,Breakfast,Eggs,3,0,5,6,70


In [3]:
# Add a total calories column
foods_df['total_cals'] = foods_df['servings'] * foods_df['calories'].astype(int)
foods_df['total_carbs'] = foods_df['servings'] * foods_df['carbs'].astype(int)
foods_df['total_fats'] = foods_df['servings'] * foods_df['fats'].astype(int)
foods_df['total_proteins'] = foods_df['servings'] * foods_df['proteins'].astype(int)
foods_df.head()

Unnamed: 0_level_0,date,meal,food_name,servings,carbs,fats,proteins,calories,total_cals,total_carbs,total_fats,total_proteins
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
353,2023-08-03,Breakfast,Ramen,1,54,14,8,370,370,54,14,8
352,2023-08-03,Breakfast,Eggs,3,0,5,6,70,210,0,15,18
351,2023-08-03,Breakfast,Pork Bacon,2,0,7,7,90,180,0,14,14
350,2023-08-03,Post-Workout,Protein Powder,4,3,2,24,120,480,12,8,96
349,2023-08-03,Post-Workout,Macaroni Salad,2,30,17,5,310,620,60,34,10


In [4]:
macros_bydate = foods_df.groupby(
    foods_df['date'], sort=False).aggregate({'total_cals': 'sum', 'total_carbs': 'sum',
                                             'total_fats': 'sum', 'total_proteins': 'sum'}).reset_index()
macros_bydate.head()

Unnamed: 0,date,total_cals,total_carbs,total_fats,total_proteins
0,2023-08-03,3658,267,175,247
1,2023-08-02,3926,498,109,280
2,2023-08-01,2708,355,67,175
3,2023-07-30,1000,54,35,122
4,2023-07-22,2680,276,85,204


In [5]:
bm_df = pd.read_sql('''CALL GetBodyMeasurementsByDate()''', mysql_url, index_col='id')
bm_df.head()

Unnamed: 0_level_0,date,body_weight,body_fat,muscle_mass,fat_mass,workout_type
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
27,2023-08-05,198.2,24.6,81.7,48.8,"""REST"""
26,2023-08-03,198.2,25.2,81.0,49.9,"""REST"""
25,2023-07-30,197.6,25.9,79.8,51.1,"""REST"""
24,2023-07-22,196.4,25.0,80.4,49.1,"""REST"""
23,2023-07-20,195.8,24.8,80.4,48.6,"""REST"""


In [6]:
bm_df['body_weight'] = bm_df['body_weight'].astype(float)
bm_df['body_fat'] = bm_df['body_fat'].astype(float)
bm_df['muscle_mass'] = bm_df['muscle_mass'].astype(float)
bm_df['fat_mass'] = bm_df['fat_mass'].astype(float)
bm_df.head()

Unnamed: 0_level_0,date,body_weight,body_fat,muscle_mass,fat_mass,workout_type
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
27,2023-08-05,198.2,24.6,81.7,48.8,"""REST"""
26,2023-08-03,198.2,25.2,81.0,49.9,"""REST"""
25,2023-07-30,197.6,25.9,79.8,51.1,"""REST"""
24,2023-07-22,196.4,25.0,80.4,49.1,"""REST"""
23,2023-07-20,195.8,24.8,80.4,48.6,"""REST"""


In [7]:
combined_df = pd.merge(macros_bydate, bm_df, on='date', how='inner')
combined_df.head(10)

Unnamed: 0,date,total_cals,total_carbs,total_fats,total_proteins,body_weight,body_fat,muscle_mass,fat_mass,workout_type
0,2023-08-03,3658,267,175,247,198.2,25.2,81.0,49.9,"""REST"""
1,2023-07-30,1000,54,35,122,197.6,25.9,79.8,51.1,"""REST"""
2,2023-07-22,2680,276,85,204,196.4,25.0,80.4,49.1,"""REST"""
3,2023-07-20,2848,236,113,223,195.8,24.8,80.4,48.6,"""REST"""
4,2023-07-19,3400,462,73,251,193.2,23.8,80.6,46.0,"""SHDR"""
5,2023-07-16,2730,250,102,220,194.4,26.8,77.2,52.0,"""REST"""
6,2023-07-14,2910,237,124,209,195.2,25.6,79.1,50.0,"""REST"""
7,2023-07-13,4008,499,117,256,195.6,24.5,80.7,47.9,"""LEGS"""
8,2023-07-12,2740,202,122,203,195.2,26.1,78.5,50.9,"""REST"""
9,2023-07-11,3300,333,139,199,195.6,26.9,77.6,52.5,"""SHDR"""


In [8]:
combined_df['mmass_diff'] = combined_df.muscle_mass.diff(periods=1)
combined_df['fmass_diff'] = combined_df.fat_mass.diff(periods=1)
combined_df['bf_diff'] = combined_df.body_fat.diff(periods=1)
combined_df['bw_diff'] = combined_df.body_weight.diff(periods=1)
combined_df = combined_df.dropna(axis=0, how='any')
combined_df.drop(['body_weight', 'body_fat', 'muscle_mass', 'fat_mass', 'workout_type'], axis=1, inplace=True)
combined_df

Unnamed: 0,date,total_cals,total_carbs,total_fats,total_proteins,mmass_diff,fmass_diff,bf_diff,bw_diff
1,2023-07-30,1000,54,35,122,-1.2,1.2,0.7,-0.6
2,2023-07-22,2680,276,85,204,0.6,-2.0,-0.9,-1.2
3,2023-07-20,2848,236,113,223,0.0,-0.5,-0.2,-0.6
4,2023-07-19,3400,462,73,251,0.2,-2.6,-1.0,-2.6
5,2023-07-16,2730,250,102,220,-3.4,6.0,3.0,1.2
6,2023-07-14,2910,237,124,209,1.9,-2.0,-1.2,0.8
7,2023-07-13,4008,499,117,256,1.6,-2.1,-1.1,0.4
8,2023-07-12,2740,202,122,203,-2.2,3.0,1.6,-0.4
9,2023-07-11,3300,333,139,199,-0.9,1.6,0.8,0.4
10,2023-07-10,2570,184,118,196,0.4,-1.4,-0.6,-1.0


In [18]:
combined_corr = combined_df[['total_cals',
                             'bw_diff']].corr()
print(combined_corr)

            total_cals   bw_diff
total_cals    1.000000 -0.167375
bw_diff      -0.167375  1.000000
