# Huckleberry Notebook

Reads, parses, and outputs sophisticated reports for the Huckleberry app.

## Prerequisite Installation

Run the commands below to install the necesssary requirements.

In [None]:
# Prerequisite install
!pip install pandas

In [None]:
# Import data
import pandas as pd

data = pd.read_csv('data/huckleberry-data.csv')
data.head() # to display the first 5 lines of loaded data

In [None]:
# Import datastructure and graphing packages

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from datetime import datetime, timedelta

## Set Desired Timespan for Data Here

In [None]:
total_days_to_view = 10

In [None]:
# Get today's date, and the date of the beginning of the desired range
today = pd.Timestamp.today()
prior_duration = today - pd.Timedelta(days=total_days_to_view-1)

# Breastfeed Stats

## Daily Total Duration

In [None]:
# Get all Feed types

# Avoid SettingWithCopyWarning err
pd.options.mode.copy_on_write = True

feedData = data[data['Type'] == 'Feed']

# Ensure 'Start' is a datetime object
feedData['Start'] = pd.to_datetime(feedData['Start'])

# Ensure 'End' is a datetime object
feedData['End'] = pd.to_datetime(feedData['End'])

# Strip timestamp from Start to create Date
feedData['Date'] = feedData['Start'].dt.date
feedDataInRange = feedData[(feedData['Start'] >= prior_duration) & (feedData['Start'] <= today)]

# Visualise the data
feedDataInRange.head()

In [None]:
# Parse data for Feed event durations over the past 7 days

# Copy the feed object to maintain structure in original
feedDataNew = feedDataInRange.copy()

# Ensure 'Duration' is a timedelta64 object for mathematics
feedDataNew['Duration'] = pd.to_timedelta(feedDataNew['Duration'] + ':00')

# Group by date (day) and sum durations (is a Series w/ date as index)
daily_totals = feedDataNew.groupby(feedDataNew['Date'])['Duration'].sum()

# Convert Duration values in Series to seconds
daily_totals_in_seconds = daily_totals.dt.total_seconds()

daily_totals.head()

In [None]:
# Define time conversion helper function for plot generation

# Formatter for timedelta64[ns] values -> HH:MM
def format_hhmm(x, pos):
    hours = int(x // 3600)
    minutes = int((x % 3600) // 60)
    return f'{hours:02d}:{minutes:02d}'

In [None]:
# Plot the histogram
totalsPlot = daily_totals_in_seconds.plot(kind='bar', figsize=(10, 6), title='Total Feed Duration Over the Last %d Days' % total_days_to_view)
totalsPlot.yaxis.set_major_formatter(ticker.FuncFormatter(format_hhmm))
plt.xlabel('Date')
plt.ylabel('Total Duration (Hours)')
plt.tight_layout()
plt.show()

## Daily Duration for Each Breast

In [None]:
# Parse data for Feed event durations (for each breast) over the past 7 days

# Eradicate Bottle feeding values
feedDataBreastDuration = feedDataNew[feedDataNew['Start Location'] == 'Breast']

# Obtain Left Breast Duration in timedelta64 format for mathematics
feedDataBreastDuration['left_breast_duration'] = feedDataBreastDuration['End Condition'].str[:-1].fillna('00:00')
feedDataBreastDuration['left_breast_duration'] = pd.to_timedelta(feedDataBreastDuration['left_breast_duration'] + ':00')
feedDataBreastDuration['left_breast_duration_s'] = feedDataBreastDuration['left_breast_duration'].dt.total_seconds()

# Obtain Right Breast Duration in timedelta64 format for mathematics
feedDataBreastDuration['right_breast_duration'] = feedDataBreastDuration['Start Condition'].str[:-1].fillna('00:00')
feedDataBreastDuration['right_breast_duration'] = pd.to_timedelta(feedDataBreastDuration['right_breast_duration'] + ':00')
feedDataBreastDuration['right_breast_duration_s'] = feedDataBreastDuration['right_breast_duration'].dt.total_seconds()

# Group by date (day) and sum durations (is a DataFrame w/ date as index)
feedDataBreastDurationByDate = feedDataBreastDuration.groupby(feedDataBreastDuration['Date'])[['left_breast_duration_s', 'right_breast_duration_s']].sum()
feedDataBreastDurationByDate.head()

In [None]:
# Plot the histogram
breastDurationsSubplot = feedDataBreastDurationByDate.plot(kind='bar', figsize=(10, 6), title='Per Breast Duration Over Last %d Days' % total_days_to_view)
breastDurationsSubplot.yaxis.set_major_formatter(ticker.FuncFormatter(format_hhmm))
plt.xlabel('Date')
plt.ylabel('Total Duration (Hours)')
plt.tight_layout()
plt.legend(['Left', 'Right'])
plt.show()

In [None]:
# Plot the piechart
pieData = feedDataBreastDurationByDate[['left_breast_duration_s', 'right_breast_duration_s']].sum()
breastDurationsPiePlot = pieData.plot(kind='pie', y='Test', figsize=(10, 6), autopct='%.2f%%', title='Breast Share %% Over Last %d Days' % total_days_to_view)
plt.tight_layout()
plt.show()

# Diaper Stats

In [None]:
# Curate diaper data

diaperData = data[data['Type'] == 'Diaper']

# Ensure 'Start' is a datetime object
diaperData['Start'] = pd.to_datetime(diaperData['Start'])

# Strip timestamp from Start to create Date
diaperData['Date'] = diaperData['Start'].dt.date
diaperDataInRange = diaperData[(diaperData['Start'] >= prior_duration) & (diaperData['Start'] <= today)]

# Map Duration to Feces Color for clarity
diaperDataInRange['Feces Color'] = diaperDataInRange['Duration']
diaperDataInRange = diaperDataInRange.drop('Duration', axis=1)

# Map Start Condition to Feces Consistency for clarity
diaperDataInRange['Feces Consistency'] = diaperDataInRange['Start Condition']
diaperDataInRange = diaperDataInRange.drop('Start Condition', axis=1)

# Map End Condition to separate excrement columns for clarity
diaperDataInRange['End Condition'] = diaperDataInRange['End Condition'].str.lower()
diaperDataInRange['Urine Amount'] = diaperDataInRange['End Condition'].str.extract(r'pee:(\w+)', expand=False).fillna('')
diaperDataInRange['Feces Amount'] = diaperDataInRange['End Condition'].str.extract(r'poo:(\w+)', expand=False).fillna('')
diaperDataInRange['Did Urinate'] = diaperDataInRange['End Condition'].str.contains(r'both|pee').fillna('')
diaperDataInRange['Did Defecate'] = diaperDataInRange['End Condition'].str.contains(r'both|poo').fillna('')
diaperDataInRange = diaperDataInRange.drop('End Condition', axis=1)

# Remove irrelevant columns
diaperColumnsToDrop = ['End', 'Start Location']
diaperDataInRange = diaperDataInRange.drop(diaperColumnsToDrop, axis=1)

diaperDataInRange.head()

In [None]:
# Group by date (day) and sum durations (is a DataFrame w/ date as index)
diaperDataInRange = diaperDataInRange.groupby(diaperDataInRange['Date'])[['Did Urinate', 'Did Defecate']].sum()
diaperDataInRange

In [None]:
# Plot the histogram
bar_colors = ['yellow', 'brown']
diaperTypeCountSubplot = diaperDataInRange.plot(kind='bar', figsize=(10, 6), title='Excrement Type Over %d Days' % total_days_to_view, color=bar_colors)
plt.xlabel('Date')
plt.ylabel('Excrement Count')

diaperTypeCountSubplot.colors = bar_colors
plt.tight_layout()
plt.show()