In [None]:
"""
Can you calculate:

- How much of the total time people urinate and defecate are these events overlapping? 
- What fraction of total output mass takes place during this overlap?

0318
- Can you please calculate  O / (U + D + O) (Where U and D are urination/defecation WITHOUT the overlap)
Calculate both for weight and time.

0319
Given the table you've already made of output masses for urine/stool, can you please calculate
the mean and standard deviation of: 
    - Urine mass
    - Rate at which urine mass is expelled
    - Stool mass
    - Rate at which stool mass is expelled
"""

# 0314 Implementation

In [None]:
from src.data import load_annotation, load_weight_sensor
from src.data.load_annotation import Annotated_Event
from scipy import signal
import numpy as np
import pandas as pd
from typing import List, Optional
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import seaborn as sns

annotations = load_annotation.get_annotation()

In [None]:
def get_overlap_from_two_lists_of_annotation(
    defecate_annotations: List[Annotated_Event],
    urinate_annotations: List[Annotated_Event]
) -> List[List[float]]:
    """
    Get the overlap timestamps from the annotations of
    urination and defecation.
    """ 
    overlaps = []
    for d in defecate_annotations:
        for u in urinate_annotations:
            # if overlap
            if d.start < u.start and d.stop > u.start:
                overlaps.append([u.start, min(d.stop, u.stop)])
            elif d.start > u.start and u.stop > d.start:
                overlaps.append([d.start, min(u.stop, d.stop)])
    return overlaps

def get_total_duration(start_stop_list: List[List[float]]) -> float:
    return sum(ss[1] - ss[0] for ss in start_stop_list)

def get_start_stop_for_user_id(user_id: int, event: str) -> List[List[float]]:
    """
    Get a list of [start, stop] annotations for user_id given event.
    """
    return [[a.start, a.stop] for a in annotations[user_id] if a.event == event]

def get_overlap_for_user_id(user_id: int) -> np.ndarray:
    """
    Get overlap for user user_id.
    """
    annotations_i = annotations[user_id]
    d_list = [a for a in annotations_i if a.event == "Defecation"]
    u_list = [a for a in annotations_i if a.event == "Urination"]
    return get_overlap_from_two_lists_of_annotation(d_list, u_list)

In [None]:
def get_diff_end_minus_start(arr: np.array) -> float:
    if len(arr) == 0:
        return 0
    st, ed = arr[0], arr[-1]
    if st > ed:
        return (st - ed)
    else:
        return 0


def apply_median_filter(sz: pd.Series, **kwargs) -> pd.Series:
    filt_sz = pd.Series(signal.medfilt(sz, kwargs['window_size']))
    filt_sz.index = sz.index
    return filt_sz

DIFF_METHODS = {
    'EndMinusStart': get_diff_end_minus_start
}

SMOOTH_METHODS = {
    'MedianFilter': apply_median_filter
}

In [None]:
class WeightChangeCalculator:
    def __init__(self, user_id):
        self.user_id = user_id
        self.total_weight_clean = load_weight_sensor.get_total_weight_clean(
            user_id)

    def get_total_weight_smoothed(self, smooth_method_name: str) -> pd.Series:
        """
        Smooth the total weight data with the assigned method.
        """
        return SMOOTH_METHODS[smooth_method_name](
            self.total_weight_clean,
            window_size=11
        )

    def get_weight_change(
        self,
        start_stop_list: List[List[float]],
        smooth_method_name: Optional[str] = 'MedianFilter',
        diff_method_name: Optional[str] = 'EndMinusStart'
    ) -> float:
        """
        Get the weight change during a list of [start, stop]
        """
        total_weight_smooth = self.get_total_weight_smoothed(
            smooth_method_name)

        res = 0
        for start_stop in start_stop_list:
            start, stop = start_stop
            total_weight_within = total_weight_smooth[
                (total_weight_smooth.index >= start) &
                (total_weight_smooth.index <= stop)
            ]

            res += DIFF_METHODS[diff_method_name](total_weight_within.values)

        return res

In [None]:
# get_overlap_for_user_id(1806)

records = []
indexes = []

for user_id in tqdm(annotations.keys()):
    if user_id < 1000:
        continue
    
    weight_change_calculator = WeightChangeCalculator(user_id)
    
    d = get_start_stop_for_user_id(user_id, event='Defecation')
    u = get_start_stop_for_user_id(user_id, event='Urination')
    o = get_overlap_for_user_id(user_id)
    
    d_duration = get_total_duration(d)
    u_duration = get_total_duration(u)
    o_duration = get_total_duration(o)
    
    d_weight = weight_change_calculator.get_weight_change(d)
    u_weight = weight_change_calculator.get_weight_change(u)
    o_weight = weight_change_calculator.get_weight_change(o)
    
    records.append([
        d, u, o,
        d_duration, u_duration, o_duration,
        d_weight, u_weight, o_weight
    ])
    
    indexes.append(user_id)

In [None]:
records_df = pd.DataFrame(
    records,
    columns=[
        'Defecation', 'Urination', 'Overlap',
        'D-duration (s)', 'U-duration (s)', 'O-duration (s)',
        'D-weight (kg)', 'U-weight (kg)', 'O-weight (kg)'
    ],
    index = indexes
)

records_df

In [None]:
records_df.to_csv('../data/processed/0315-investigating-overlap.csv')

# 0318 Evaluation

In [None]:
res_df = pd.read_csv('../data/processed/0315-investigating-overlap.csv', index_col=0)

In [None]:
contingency_tbl = np.zeros((2,2))

if_urination = res_df['U-duration (s)'] != 0
if_defecation = res_df['D-duration (s)'] != 0

contingency_tbl[0, 0] = sum(   if_defecation &    if_urination)
contingency_tbl[0, 1] = sum(   if_defecation &  (~if_urination))
contingency_tbl[1, 0] = sum( (~if_defecation) &   if_urination)
contingency_tbl[1, 1] = 0

## Contingency table: 58 cases have both urination and defecation

In [None]:
pd.DataFrame(
    contingency_tbl.astype(int),
    columns=['U-Y', 'U-N'],
    index = ['D-Y', 'D-N']
)

## 24 out of 58 have overlaps

In [None]:
res_sub_df = res_df[if_defecation & if_urination]

sum(res_sub_df['O-duration (s)'] > 0), len(res_sub_df)

## Visualize the 24 overlap cases

In [None]:
# time
overlaps_df = res_sub_df[res_sub_df['O-duration (s)'] > 0]
time_overlap = overlaps_df['O-duration (s)'] / (
    overlaps_df['U-duration (s)'] + overlaps_df['D-duration (s)'] - overlaps_df['O-duration (s)'])
# weight
weight_overlap = overlaps_df['O-weight (kg)'] / (
    overlaps_df['U-weight (kg)'] + overlaps_df['D-weight (kg)'] - overlaps_df['O-weight (kg)'])

In [None]:
plt.figure(figsize=(10, 3))
plt.subplot(121)
sns.boxplot(x = time_overlap)
plt.xlim(0, 1)
plt.title('Time')

plt.subplot(122)
sns.boxplot(x = weight_overlap)
plt.xlim(0, 1)
plt.title('Weight')

plt.tight_layout()

plt.savefig('../reports/figures/overlap-evaluation.png')
pass

In [None]:
np.mean(time_overlap), np.std(time_overlap)

In [None]:
np.mean(weight_overlap), np.std(weight_overlap)