In [None]:
!pip install exetera

In [None]:
%matplotlib inline
from datetime import datetime, timedelta
import numpy as np
import h5py
import matplotlib.pyplot as plt

from exetera.core.session import Session

start_date = # datetime(year, month, date), inclusive
end_date = # datetime(year, month, date), exclusive
filename = # file name for source dataset

dates = [end_date]
cur_date = end_date - timedelta(days=7)
while cur_date >= start_date:
    dates.append(cur_date)
    cur_date -= timedelta(days=7)
dates.reverse()

In [None]:
with h5py.File(filename, 'r') as src:
    s = Session()
    a_src = src['assessments']
    asmt_created_at_ = s.get(a_src['created_at']).data[:]
    asmt_healthy_ = s.get(a_src['health_status']).data[:]
    partitions = len(dates)-1
    healthy_asmts_by_range = np.zeros(partitions, dtype=np.int32)
    unhealthy_asmts_by_range = np.zeros(partitions, dtype=np.int32)

    for d in range(partitions):
        print("period starting {}".format(dates[d]))
        start_timestamp = dates[d].timestamp()
        end_timestamp = dates[d+1].timestamp()
        in_current_month = (start_timestamp <= asmt_created_at_) & (asmt_created_at_ < end_timestamp)
        
        tests_by_range = s.apply_filter(in_current_month, asmt_healthy_)
        
        result_counts = np.unique(tests_by_range, return_counts=True)
        if len(result_counts[0]) == 0:
            healthy_asmts_by_range[d] = 0
            unhealthy_asmts_by_range[d] = 0
        else:
            result_counts = dict(zip(result_counts[0], result_counts[1]))
            healthy_asmts_by_range[d] = result_counts[0] + result_counts[1]
            unhealthy_asmts_by_range[d] = result_counts[2]


In [None]:
width = 1
widths = [width * d for d in range(partitions)]

fig, ax = plt.subplots(2, 1, figsize=(10, 10))

negtests = ax[0].bar(widths, healthy_asmts_by_range)
postests = ax[0].bar(widths, unhealthy_asmts_by_range, bottom=healthy_asmts_by_range)

ax[0].set_title("Assessment counts by week")
ax[0].set_xticks(np.arange(partitions))
ax[0].set_xticklabels([datetime.strftime(d, '%Y-%m-%d') for d in dates[:-1]], rotation=270)
ax[0].set_yticks(np.arange(10) * 1000000)
ax[0].set_yticklabels(i for i in range(10))
ax[0].legend((negtests, postests), ("'Healthy'", "'Unhealthy'"))
ax[0].set_xlabel("Week starting")
ax[0].set_ylabel("Million tests per week")

pos_fraction = unhealthy_asmts_by_range / (unhealthy_asmts_by_range + healthy_asmts_by_range)
pfbar = ax[1].bar(widths, pos_fraction, color="#ff7f0e")

ax[1].set_title("'Unhealthy' assessments as a fraction of assessments by week")
ax[1].set_xticks(np.arange(partitions))
ax[1].set_xticklabels([datetime.strftime(d, '%Y-%m-%d') for d in dates[:-1]], rotation=270)
ax[1].legend((pfbar,), ("Positive test fraction",))
ax[1].set_xlabel("Week starting")
ax[1].set_ylabel("'Unhealthy' assessment fraction")

fig.tight_layout(h_pad=2.5)
plt.show()