# Fast generation of positive and negative test result counts by period

In [None]:
%matplotlib inline
import math

import numpy as np
from numba import njit
import matplotlib.pyplot as plt

from exetera.core.session import Session
from exetera.core.utils import Timer
from exetera.processing.date_time_helpers import\
    get_periods, generate_period_offset_map, get_days, get_period_offsets

### Helper functions

In [None]:
def human_readable_date(date):
    '''
    Transfer the float timestamp to a string representated date.
    '''
    if isinstance(date, float):
        date = datetime.fromtimestamp(date)
    return date.strftime("%Y-%m-%d")

## Fill in these parameters

In [None]:
from datetime import datetime, timedelta

filename = # filename
start_dt = # the starting datetime
end_dt = # the ending datetime

## Generate the summaries by seven day period

### Generate the seven day periods corresponding to the start and end dates

In [None]:
# Trunk the dates range into seven-day periods
start_ts = start_dt.timestamp()
end_ts = end_dt.timestamp()
periods = get_periods(end_dt, start_dt, 'week', -1)
periods.reverse()
print("Weekly periods from {} to {}".format(human_readable_date(periods[0]),
                                            human_readable_date(periods[-1])))

### Create the Session object
Note, you can also use `with Session() as s:` if you don't mind opening the session in each cell

In [None]:
s = Session()  # Open the ExeTera session
src = s.open_dataset(filename, 'r', 'src')  # Open the dataset with read-only 'r' mode
test_df = src['tests']  # Get the dataframe named 'tests'

### Get the timestamp for each user signup

In [None]:
with Timer("Fetching test 'date_taken_specific' values"):  # Record the time usage
    test_dates = test_df['date_taken_specific'].data[:]  # Load all the data into memory

### Calculate on what day (relative to the start of the first period) each user signed up
`get_days` also returns a filter indicating whether a given record is within the date range of interest

In [None]:
with Timer("Calculating day offsets for tests"):  # Record the time usage
    # Converts a field of timestamps into a field of relative elapsed days
    test_days, inrange = get_days(test_dates,  
                                  start_date=periods[0].timestamp(),
                                  end_date=periods[-1].timestamp())

### Clear the days that fall outside of the specified range

In [None]:
with Timer("Filter out days that fall outside of the specified range"):
    test_days = test_days[inrange]

### Map the days to their corresponding periods
We generate the map using `generate_period_offset_map` and then pass it to `generate_period_offsets`

In [None]:
with Timer("Convert from days to periods"):
    test_periods = get_period_offsets(generate_period_offset_map(periods),
                                      test_days)
    # cat_counts = np.unique(cat_period, return_counts=True)

### Generate 'positive' and 'negative' test filters
Ignore all other test results

In [None]:
with Timer("Generate positive and negative status arrays"):
    positive = test_df['result'].apply_filter(inrange) == 4  # Filter created according to data value defined in scheme
    negative = test_df['result'].apply_filter(inrange) == 3

### Summarise positive and negative by period

In [None]:
with Timer("Summarise positive and negative test counts by period"):
    negative_counts = np.unique(test_periods[negative.data[:]], return_counts=True)  # Count number of negative tests in each period
    all_negative_counts = np.zeros(len(periods), dtype=np.int32)
    for k, v in zip(negative_counts[0], negative_counts[1]):
        all_negative_counts[k] = v  # Assign the counts to an array

    positive_counts = np.unique(test_periods[positive.data[:]], return_counts=True)  # Similar to positive tests
    all_positive_counts = np.zeros(len(periods), dtype=np.int32)
    for k, v in zip(positive_counts[0], positive_counts[1]):
        all_positive_counts[k] = v

## Generate the charts for positive / (positive + negative) test results

In [None]:
width = 1
widths = [width * d for d in range(len(periods))]

fig, ax = plt.subplots(2, 1, figsize=(10, 10))

negtests = ax[0].bar(widths, all_negative_counts)
postests = ax[0].bar(widths, all_positive_counts, bottom=all_negative_counts)

ax[0].set_title("Negative and positive test counts by week")
ax[0].set_xticks(np.arange(len(periods)-1))
ax[0].set_xticklabels([human_readable_date(d) for d in periods[:-1]], rotation=270)
ax[0].legend((negtests, postests), ("'Negative'", "'Positive'"))
ax[0].set_xlabel("Week starting")
ax[0].set_ylabel("Tests per week")

all_counts = all_negative_counts + all_positive_counts
all_counts = np.where(all_counts == 0, 1, all_counts)

pos_fraction = all_positive_counts / all_counts
pfbar = ax[1].bar(widths, pos_fraction, color="#ff7f0e")

ax[1].set_title("Positive tests by fraction of all definite results by week")
ax[1].set_xticks(np.arange(len(periods)-1))
ax[1].set_xticklabels([human_readable_date(d) for d in periods[:-1]], rotation=270)
ax[1].legend((pfbar,), ("Positive test fraction",))
ax[1].set_xlabel("Week starting")
ax[1].set_ylabel("Positive test fraction")

fig.tight_layout(h_pad=2.5)
plt.show()

In [None]:
# Close the session manually; not needed if opening the session using 'with' statement.
s.close()