Meal Stats
====
Statistics on the number of entries of each type each participant made per day

In [1]:
import os

%load_ext autoreload
%autoreload 2

img_dir = "outputs/imgs/meal_stats/"
if not os.path.exists(img_dir):
    os.makedirs(img_dir)

In [None]:
"""
Read and clean the meal entry data

"""

from analysis_utils import clean

meal_info = clean.cleaned_smartwatch(keep_catchups=True, keep_day0=False)
meal_info.head(5)

In [None]:
""" Pick out the rows and columns we want to keep """

cols = ["meal_type", "p_id", "delta", "catchup_flag", "any_in_ramadan"]
meal_info = meal_info[cols]

meal_info = meal_info[~meal_info["meal_type"].isin({"Catch-up start", "Catch-up end"})]
meal_info.to_clipboard()
meal_info.head()

In [None]:
# Check we have the expected values
assert set(meal_info["meal_type"]) == {
    "Meal",
    "Drink",
    "Snack",
    "No food/drink",
    "No response",
    "No catch-up",
}, set(meal_info["meal_type"])

# Add the study day to the dataframe
meal_info["day"] = meal_info["delta"].dt.days

# Drop columns we won't need
meal_info = meal_info.drop(columns=["delta"])
meal_info.head()

In [None]:
"""
Find how many entries there were per day overall

"""

import pandas as pd


def group(df: pd.DataFrame) -> pd.DataFrame:
    """
    From a dataframe containing the columns "p_id" and "day", return a dataframe
    containing the number of entries per day per participant.

    """
    grouped_df = df.groupby(["p_id", "day"]).size().reset_index(name="count")

    # If there are no entries for a day, add a row with 0 entries
    for p_id in set(grouped_df["p_id"]):
        for day in range(1, 8):
            if not ((grouped_df["p_id"] == p_id) & (grouped_df["day"] == day)).any():
                grouped_df = pd.concat(
                    [
                        grouped_df,
                        pd.DataFrame({"p_id": [p_id], "day": [day], "count": [0]}),
                    ]
                )

    # Error if there are any p_ids which dont have an entry for every day
    for p_id in set(grouped_df["p_id"]):
        assert set(grouped_df[grouped_df["p_id"] == p_id]["day"]) == set(
            range(1, 8)
        ), f"Missing days for participant {p_id}: {set(grouped_df[grouped_df['p_id'] == p_id]['day'])}"

    return grouped_df


group(meal_info).head()

In [None]:
from typing import Union

import matplotlib.pyplot as plt


def boxplot(
    df: pd.DataFrame, axis: plt.Axes = None
) -> Union[None | tuple[plt.Figure, plt.Axes]]:
    """
    Make a boxplot of the "count"

    :param df: dataframe with the columns "p_id", "day" and "count"
    :param axis: optional axis to plot on. If not provided, creates a new figure

    :returns: the figure, if a new figure was created
    :returns: the axis, if a new figure was created

    """
    if axis is None:
        new_fig_created = True
        fig, axis = plt.subplots()
    else:
        new_fig_created = False

    df.boxplot(column="count", by="day", ax=axis)

    axis.set_title("")
    axis.set_xlabel("Day")
    axis.set_ylabel("Number of entries")

    axis.set_ylim(0, axis.get_ylim()[1])

    if new_fig_created:
        return fig, axis


fig = boxplot(group(meal_info))

In [None]:
"""
Plot this as boxplots

"""

fig, axes = plt.subplots(1, 5, figsize=(20, 4), sharey=True)

for axis, meal_type in zip(axes, meal_info["meal_type"].unique()):
    boxplot(group(meal_info[meal_info["meal_type"] == meal_type]), axis=axis)
    axis.set_title(meal_type)

for axis in axes[1:]:
    axis.set_ylabel("")

fig.suptitle("")
fig.savefig(f"{img_dir}/meal_entries_per_day.png", bbox_inches="tight")

In [None]:
"""
Print a table of the number of each type of entry per day

"""

from IPython.display import display


def summarise(df: pd.DataFrame, meal_type: str = None) -> pd.DataFrame:
    """
    From a dataframe containing the columns "p_id", "day" and "count", return a dataframe
    containing the number of entries per day per participant.

    :param df: dataframe with the columns "p_id", "day" and "count"
    :param meal_type: the meal type to consider

    """
    label = meal_type if meal_type is not None else "All"
    result = (
        group(df)
        .groupby("day")
        .agg(
            {
                "count": [
                    "median",
                    lambda x: x.quantile(0.75) - x.quantile(0.25),
                ]
            }
        )
    )
    result.columns = result.columns.set_levels([label], level=0)
    result.columns = result.columns.set_levels(["median", "IQR"], level=1)

    return result


display(summarise(meal_info))

# Display the summary for each meal type
dfs = []
for meal_type in meal_info["meal_type"].unique():
    dfs.append(summarise(meal_info[meal_info["meal_type"] == meal_type], meal_type))
pd.concat(dfs, axis=1)

In [None]:
""" Do these summaries with only the non-catchup entries"""