# Table of Contents
- [Import necessary libraries](#Import-necessary-libraries)
- [Load patient_df with wearables and demographic data](#Load-patient_df-with-wearables-and-demographic-data)
- [Set index to datetime and encode gender as 0 and 1](#Set-index-to-datetime-and-encode-gender-as-0-and-1)
- [Load and inspect food_df](#Load-and-inspect-food_df)
- [Align months for food_log and patient_df](#Align-months-for-food_log-and-patient_df)
- [Merge food_log with patient_id](#Merge-food_log-with-patient_id)
- [Forward fill food log instances within the merged DataFrame](#Forward-fill-food-log-instances-within-the-merged-DataFrame)
- [Inspect merged_df](#Inspect-merged_df)
- [Drop unnecessary columns](#Drop-unnecessary-columns)
- [Adjust column order to be more intuitive](#Adjust-column-order-to-be-more-intuitive)
- [Save final DataFrame as combined_df](#Save-final-DataFrame-as-combined_df)


### Import necessary libraries

In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from pandas.tseries.offsets import DateOffset

### Load patient_df which has wearables and demographic data

In [2]:
patient_df = pd.read_csv('patient_df.csv')

### Set index to datetime and encode gender as 0 and 1

In [3]:
# Convert 'datetime' column to datetime format
patient_df['datetime'] = pd.to_datetime(patient_df['datetime'])

# Set 'datetime' as the index of your DataFrame
patient_df = patient_df.set_index('datetime')

# Drop columns and rows where there is na
patient_df = patient_df.dropna(axis=1, how='all')
patient_df = patient_df.dropna()



# Now you can print the head of your dataframe with datetime as index
patient_df.head()


Unnamed: 0_level_0,patient_id,glucose,Gender,HbA1c,acc_mean,bvp_mean,eda_mean,hr_mean,ibi_mean,temp_mean,...,ibi_max,ibi_q1,ibi_q3,ibi_skew,bvp_std,bvp_min,bvp_max,bvp_q1,bvp_q3,bvp_skew
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-02-13 17:23:32,1,61.0,FEMALE,5.5,87.095625,-0.004786,0.84805,82.318333,0.713904,33.171867,...,0.921917,0.671906,0.76566,0.316494,14.599009,-69.01,84.95,-9.68,9.68,-0.106814
2020-02-13 17:28:32,1,59.0,FEMALE,5.5,88.107187,-0.001255,0.632578,75.429167,0.837369,33.136333,...,1.328186,0.76566,0.906291,0.427697,12.277287,-64.23,65.07,-7.44,7.99,-0.33356
2020-02-13 17:33:32,1,58.0,FEMALE,5.5,57.597604,0.020368,1.544714,75.9734,0.777253,33.244767,...,1.156303,0.718783,0.812537,1.671801,24.076577,-174.61,202.98,-8.5125,9.61,-0.234153
2020-02-13 17:38:32,1,59.0,FEMALE,5.5,66.899687,-0.009613,1.839445,77.138967,0.808537,33.315067,...,1.078174,0.718783,0.890666,-0.018164,21.945661,-191.8,130.97,-5.8,6.72,-0.735376
2020-02-13 17:43:31,1,63.0,FEMALE,5.5,29.774792,-0.012741,4.880899,81.056267,0.760995,33.660067,...,1.078174,0.687531,0.828163,0.390202,14.06804,-147.92,102.04,-6.8,6.9,-0.880729


In [4]:
le = LabelEncoder()
patient_df['Gender'] = le.fit_transform(patient_df['Gender'])

# 'Gender' column is now encoded as follows:
# 1 corresponds to 'MALE' and 0 corresponds to 'FEMALE'

In [5]:
patient_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 26665 entries, 2020-02-13 17:23:32 to 2020-07-23 22:28:07
Data columns (total 76 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   patient_id      26665 non-null  int64  
 1   glucose         26665 non-null  float64
 2   Gender          26665 non-null  int64  
 3   HbA1c           26665 non-null  float64
 4   acc_mean        26665 non-null  float64
 5   bvp_mean        26665 non-null  float64
 6   eda_mean        26665 non-null  float64
 7   hr_mean         26665 non-null  float64
 8   ibi_mean        26665 non-null  float64
 9   temp_mean       26665 non-null  float64
 10  eda_std         26665 non-null  float64
 11  eda_min         26665 non-null  float64
 12  eda_max         26665 non-null  float64
 13  eda_q1          26665 non-null  float64
 14  eda_q3          26665 non-null  float64
 15  eda_skew        26665 non-null  float64
 16  eda_peaks       26665 non-null  float64
 

In [6]:
patient_df.patient_id.unique()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16])

### Load food_df and inspect

In [7]:
food_df = pd.read_csv('food_df.csv')

In [8]:
# Reset the index
food_df.reset_index(drop=True, inplace=True)

# Drop the unnamed column 
food_df.drop(food_df.columns[food_df.columns.str.contains('unnamed', case=False)], axis=1, inplace=True)

# Check the result
food_df.head()


Unnamed: 0,patient_id,time_begin,logged_food,amount,unit,searched_food,calorie,total_carb,dietary_fiber,sugar,protein,total_fat
0,1,2020-02-13 18:00:00,Berry Smoothie,20.0,fluid ounce,Strawberry Smoothie,456.0,85.0,1.7,83.0,16.0,3.3
1,1,2020-02-13 20:30:00,Chicken LegAsparagus,5.0,0,chicken legAsparagus,488.0,2.5,1.2,0.8,63.4,23.1
2,1,2020-02-14 07:10:00,Natrel Lactose Free 2 PercentStandard Breakfast,8.75,fluid ouncecup,(Natrel) Lactose Free 2% Partly Skimmed Milk(K...,230.0,35.0,0.0,18.0,13.0,0.0
3,1,2020-02-14 09:38:00,Breakfast Trail Mix,0.5,cup,"(Giant) Breakfast Blend, Trail Mix",280.0,30.0,0.0,22.0,4.0,0.0
4,1,2020-02-14 12:38:00,Spinach Salad w/ strawberries and cheeseEgg,201.0,grams,Spinach And Strawberry SaladLarge Egg,358.0,14.4,0.0,8.7,13.9,0.0


In [9]:
food_df['time_begin'] = pd.to_datetime(food_df['time_begin'], format='mixed')

# Check updated datatypes
food_df.dtypes


patient_id                int64
time_begin       datetime64[ns]
logged_food              object
amount                  float64
unit                     object
searched_food            object
calorie                 float64
total_carb              float64
dietary_fiber           float64
sugar                   float64
protein                 float64
total_fat               float64
dtype: object

In [10]:
food_df = food_df.select_dtypes(exclude=['object']).drop(columns=['amount'])

In [11]:
food_df.head()

Unnamed: 0,patient_id,time_begin,calorie,total_carb,dietary_fiber,sugar,protein,total_fat
0,1,2020-02-13 18:00:00,456.0,85.0,1.7,83.0,16.0,3.3
1,1,2020-02-13 20:30:00,488.0,2.5,1.2,0.8,63.4,23.1
2,1,2020-02-14 07:10:00,230.0,35.0,0.0,18.0,13.0,0.0
3,1,2020-02-14 09:38:00,280.0,30.0,0.0,22.0,4.0,0.0
4,1,2020-02-14 12:38:00,358.0,14.4,0.0,8.7,13.9,0.0


### Align months for food_log and patient_df

In [12]:
# Apply the specific adjustment for 'patient_id' 15 and 16
food_df.loc[food_df['patient_id'].isin([7, 13, 15, 16]), 'time_begin'] += DateOffset(months=5)

In [13]:
patient_df.reset_index(inplace=True)


In [14]:
patient_df.columns

Index(['datetime', 'patient_id', 'glucose', 'Gender', 'HbA1c', 'acc_mean',
       'bvp_mean', 'eda_mean', 'hr_mean', 'ibi_mean', 'temp_mean', 'eda_std',
       'eda_min', 'eda_max', 'eda_q1', 'eda_q3', 'eda_skew', 'eda_peaks',
       'acc_x_mean', 'acc_x_std', 'acc_x_min', 'acc_x_max', 'acc_x_q1',
       'acc_x_q3', 'acc_x_skew', 'acc_x_2hr_mean', 'acc_x_2hr_max',
       'acc_y_mean', 'acc_y_std', 'acc_y_min', 'acc_y_max', 'acc_y_q1',
       'acc_y_q3', 'acc_y_skew', 'acc_y_2hr_mean', 'acc_y_2hr_max',
       'acc_z_mean', 'acc_z_std', 'acc_z_min', 'acc_z_max', 'acc_z_q1',
       'acc_z_q3', 'acc_z_skew', 'acc_z_2hr_mean', 'acc_z_2hr_max', 'acc_std',
       'acc_min', 'acc_max', 'acc_q1', 'acc_q3', 'acc_skew', 'acc_2hr_mean',
       'acc_2hr_max', 'hr_std', 'hr_min', 'hr_max', 'hr_q1', 'hr_q3',
       'hr_skew', 'temp_std', 'temp_min', 'temp_max', 'temp_q1', 'temp_q3',
       'temp_skew', 'ibi_std', 'ibi_min', 'ibi_max', 'ibi_q1', 'ibi_q3',
       'ibi_skew', 'bvp_std', 'bvp_min', 'bvp_

### Merge food_log and patient_id

In [15]:
food_df

Unnamed: 0,patient_id,time_begin,calorie,total_carb,dietary_fiber,sugar,protein,total_fat
0,1,2020-02-13 18:00:00,456.0,85.0,1.7,83.0,16.0,3.3
1,1,2020-02-13 20:30:00,488.0,2.5,1.2,0.8,63.4,23.1
2,1,2020-02-14 07:10:00,230.0,35.0,0.0,18.0,13.0,0.0
3,1,2020-02-14 09:38:00,280.0,30.0,0.0,22.0,4.0,0.0
4,1,2020-02-14 12:38:00,358.0,14.4,0.0,8.7,13.9,0.0
...,...,...,...,...,...,...,...,...
730,16,2020-07-26 10:15:00,199.0,30.9,4.0,15.1,4.8,8.4
731,16,2020-07-26 18:30:00,2085.0,256.8,16.3,51.0,80.6,80.8
732,16,2020-07-27 10:30:00,280.0,56.5,1.0,24.0,8.0,2.5
733,16,2020-07-27 11:30:00,650.0,92.0,14.9,11.7,32.0,16.9


In [16]:
dfs = []
for pid in patient_df['patient_id'].unique():  # loop over unique patient IDs
    # Subset and set index for patient_df
    df1 = patient_df.loc[patient_df['patient_id'] == pid]

    # Subset food_df for the current patient and sort by 'datetime'
    df2 = food_df.loc[food_df['patient_id'] == pid].sort_values(by='time_begin')

    # Add 'food_' prefix to all columns of df2
    df2.columns = df2.add_prefix('food_').columns

    # Perform merge_asof
    df_merged = pd.merge_asof(df1, df2, left_on='datetime', right_on='food_time_begin', direction='nearest',
                              tolerance=pd.Timedelta('3 min'))

    df_merged['patient_id'] = pid  # add 'patient_id' column to the merged dataframe

    dfs.append(df_merged)

# Concatenate all DataFrames in dfs list into one DataFrame
merged_df = pd.concat(dfs)


### Forward fill food log instances within the merged df

In [17]:
def fill_food_cols(group):
    # Select columns that start with 'food_'
    food_cols = [col for col in group.columns if col.startswith('food_')]

    # Filter numeric columns
    numeric_food_cols = group[food_cols].select_dtypes(include='number').columns.tolist()

    # Create new columns with suffix '_ffwd' based on the existing numeric 'food_' columns
    for col in numeric_food_cols:
        new_col_name = f'{col}_ffwd'
        group[new_col_name] = group[col]

    # Now, forward-fill NaN values only for the '_ffwd' columns (preserving original columns)
    ffwd_food_cols = [col for col in group.columns if col.endswith('_ffwd')]

    # If the DataFrame has rows, set the value at the first row index to 0
    if len(group) > 0:
        for col in ffwd_food_cols:
            group.loc[group.index[0], col] = 0

    # Forward-fill NaN values for '_ffwd' columns and 'time_begin'
    group[ffwd_food_cols] = group[ffwd_food_cols].fillna(method='ffill')
    group['food_time_begin'] = group['food_time_begin'].fillna(method='ffill')

    return group


# Apply the function to each group based on 'patient_id'
grouped = merged_df.groupby('patient_id')
df_filled = grouped.apply(fill_food_cols)


  group[ffwd_food_cols] = group[ffwd_food_cols].fillna(method='ffill')
  group['food_time_begin'] = group['food_time_begin'].fillna(method='ffill')
  group[ffwd_food_cols] = group[ffwd_food_cols].fillna(method='ffill')
  group['food_time_begin'] = group['food_time_begin'].fillna(method='ffill')
  group[ffwd_food_cols] = group[ffwd_food_cols].fillna(method='ffill')
  group['food_time_begin'] = group['food_time_begin'].fillna(method='ffill')
  group[ffwd_food_cols] = group[ffwd_food_cols].fillna(method='ffill')
  group['food_time_begin'] = group['food_time_begin'].fillna(method='ffill')
  group[ffwd_food_cols] = group[ffwd_food_cols].fillna(method='ffill')
  group['food_time_begin'] = group['food_time_begin'].fillna(method='ffill')
  group[ffwd_food_cols] = group[ffwd_food_cols].fillna(method='ffill')
  group['food_time_begin'] = group['food_time_begin'].fillna(method='ffill')
  group[ffwd_food_cols] = group[ffwd_food_cols].fillna(method='ffill')
  group['food_time_begin'] = group['food_

### inspect merged_df

In [18]:
df_filled

Unnamed: 0_level_0,Unnamed: 1_level_0,datetime,patient_id,glucose,Gender,HbA1c,acc_mean,bvp_mean,eda_mean,hr_mean,ibi_mean,...,food_sugar,food_protein,food_total_fat,food_patient_id_ffwd,food_calorie_ffwd,food_total_carb_ffwd,food_dietary_fiber_ffwd,food_sugar_ffwd,food_protein_ffwd,food_total_fat_ffwd
patient_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,0,2020-02-13 17:23:32,1,61.0,0,5.5,87.095625,-0.004786,0.848050,82.318333,0.713904,...,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,2020-02-13 17:28:32,1,59.0,0,5.5,88.107187,-0.001255,0.632578,75.429167,0.837369,...,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,2020-02-13 17:33:32,1,58.0,0,5.5,57.597604,0.020368,1.544714,75.973400,0.777253,...,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,3,2020-02-13 17:38:32,1,59.0,0,5.5,66.899687,-0.009613,1.839445,77.138967,0.808537,...,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,4,2020-02-13 17:43:31,1,63.0,0,5.5,29.774792,-0.012741,4.880899,81.056267,0.760995,...,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16,1787,2020-07-23 22:08:08,16,114.0,1,5.5,53.183854,-0.054992,0.223396,80.214200,0.752190,...,,,,16.0,690.0,123.0,8.8,14.2,30.7,8.2
16,1788,2020-07-23 22:13:07,16,121.0,1,5.5,100.113021,0.112629,0.300561,75.746267,0.800969,...,,,,16.0,690.0,123.0,8.8,14.2,30.7,8.2
16,1789,2020-07-23 22:18:07,16,127.0,1,5.5,102.000833,-0.032901,0.363086,76.986933,0.780146,...,,,,16.0,690.0,123.0,8.8,14.2,30.7,8.2
16,1790,2020-07-23 22:23:07,16,132.0,1,5.5,101.791667,0.199750,0.390997,76.054286,0.796027,...,,,,16.0,690.0,123.0,8.8,14.2,30.7,8.2


In [19]:
df_filled.columns

Index(['datetime', 'patient_id', 'glucose', 'Gender', 'HbA1c', 'acc_mean',
       'bvp_mean', 'eda_mean', 'hr_mean', 'ibi_mean', 'temp_mean', 'eda_std',
       'eda_min', 'eda_max', 'eda_q1', 'eda_q3', 'eda_skew', 'eda_peaks',
       'acc_x_mean', 'acc_x_std', 'acc_x_min', 'acc_x_max', 'acc_x_q1',
       'acc_x_q3', 'acc_x_skew', 'acc_x_2hr_mean', 'acc_x_2hr_max',
       'acc_y_mean', 'acc_y_std', 'acc_y_min', 'acc_y_max', 'acc_y_q1',
       'acc_y_q3', 'acc_y_skew', 'acc_y_2hr_mean', 'acc_y_2hr_max',
       'acc_z_mean', 'acc_z_std', 'acc_z_min', 'acc_z_max', 'acc_z_q1',
       'acc_z_q3', 'acc_z_skew', 'acc_z_2hr_mean', 'acc_z_2hr_max', 'acc_std',
       'acc_min', 'acc_max', 'acc_q1', 'acc_q3', 'acc_skew', 'acc_2hr_mean',
       'acc_2hr_max', 'hr_std', 'hr_min', 'hr_max', 'hr_q1', 'hr_q3',
       'hr_skew', 'temp_std', 'temp_min', 'temp_max', 'temp_q1', 'temp_q3',
       'temp_skew', 'ibi_std', 'ibi_min', 'ibi_max', 'ibi_q1', 'ibi_q3',
       'ibi_skew', 'bvp_std', 'bvp_min', 'bvp_

### drop unnecessary columns

In [20]:
df_filled.reset_index(level=0, drop=True, inplace=True)
df_filled = df_filled.reset_index()
df_filled = df_filled.drop(columns=['food_patient_id', 'food_patient_id_ffwd'])
df_filled['datetime'] = pd.to_datetime(df_filled['datetime'], format='mixed')
# Select columns of number type from DataFrame
numerical_cols = df_filled.select_dtypes(include='number').columns

# Fill missing values with 0
df_filled[numerical_cols] = df_filled[numerical_cols].fillna(0)


In [21]:
df_filled.head(50)

Unnamed: 0,index,datetime,patient_id,glucose,Gender,HbA1c,acc_mean,bvp_mean,eda_mean,hr_mean,...,food_dietary_fiber,food_sugar,food_protein,food_total_fat,food_calorie_ffwd,food_total_carb_ffwd,food_dietary_fiber_ffwd,food_sugar_ffwd,food_protein_ffwd,food_total_fat_ffwd
0,0,2020-02-13 17:23:32,1,61.0,0,5.5,87.095625,-0.004786,0.84805,82.318333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,2020-02-13 17:28:32,1,59.0,0,5.5,88.107187,-0.001255,0.632578,75.429167,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,2020-02-13 17:33:32,1,58.0,0,5.5,57.597604,0.020368,1.544714,75.9734,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,2020-02-13 17:38:32,1,59.0,0,5.5,66.899687,-0.009613,1.839445,77.138967,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,2020-02-13 17:43:31,1,63.0,0,5.5,29.774792,-0.012741,4.880899,81.056267,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,5,2020-02-13 17:48:31,1,67.0,0,5.5,28.128333,0.007091,5.533543,78.199633,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,6,2020-02-13 17:53:31,1,68.0,0,5.5,34.513646,0.005111,2.561955,75.8926,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,7,2020-02-13 17:58:31,1,63.0,0,5.5,70.049167,-0.007258,1.702016,77.694533,...,1.7,83.0,16.0,3.3,456.0,85.0,1.7,83.0,16.0,3.3
8,8,2020-02-13 18:03:32,1,59.0,0,5.5,76.083333,0.033502,1.119807,77.921233,...,0.0,0.0,0.0,0.0,456.0,85.0,1.7,83.0,16.0,3.3
9,9,2020-02-13 18:08:32,1,60.0,0,5.5,88.574479,-0.020472,0.46296,78.656733,...,0.0,0.0,0.0,0.0,456.0,85.0,1.7,83.0,16.0,3.3


### Adjust column order to be more intuitive

In [22]:
# Drop the 'index' column
df_filled.drop('index', axis=1, inplace=True)

# Move 'glucose' column to second place
cols = df_filled.columns.tolist()
cols.insert(1, cols.pop(cols.index('glucose')))
df_filled = df_filled.reindex(columns=cols)  # reindex the dataframe once

# Move 'patient_id' column to the third place
cols = df_filled.columns.tolist()
cols.insert(2, cols.pop(cols.index('patient_id')))
df_filled = df_filled.reindex(columns=cols)  # reindex the dataframe once again with the new column order

df_filled = df_filled.drop(columns='food_time_begin')


In [23]:
df_filled.head()

Unnamed: 0,datetime,glucose,patient_id,Gender,HbA1c,acc_mean,bvp_mean,eda_mean,hr_mean,ibi_mean,...,food_dietary_fiber,food_sugar,food_protein,food_total_fat,food_calorie_ffwd,food_total_carb_ffwd,food_dietary_fiber_ffwd,food_sugar_ffwd,food_protein_ffwd,food_total_fat_ffwd
0,2020-02-13 17:23:32,61.0,1,0,5.5,87.095625,-0.004786,0.84805,82.318333,0.713904,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2020-02-13 17:28:32,59.0,1,0,5.5,88.107187,-0.001255,0.632578,75.429167,0.837369,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2020-02-13 17:33:32,58.0,1,0,5.5,57.597604,0.020368,1.544714,75.9734,0.777253,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2020-02-13 17:38:32,59.0,1,0,5.5,66.899687,-0.009613,1.839445,77.138967,0.808537,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2020-02-13 17:43:31,63.0,1,0,5.5,29.774792,-0.012741,4.880899,81.056267,0.760995,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Save final df as combined_df which has wearables, demographic, and food_log information

In [24]:
df_filled.to_csv('combined_df.csv', index=False)