<a href="https://colab.research.google.com/github/SahputraS/Outbreak-Simulation-and-Detection-Testing/blob/main/EpiQuark.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import string
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import  f1_score, precision_score, recall_score, accuracy_score
import random
import itertools
import matplotlib.pyplot as plt

In [2]:
try:
    from epiquark import conf_matrix, score, timeliness
except ImportError:
    import sys
    !{sys.executable} -m pip install git+https://github.com/aauss/epi-quark.git
    from epiquark import conf_matrix, score, timeliness

I define the simulation setup as follows:

- Time axis:  
  $$\mathcal{T} = \{1, 2, \dots, 52\}$$

- Spatial units:  
  $$\mathcal{Y} = \{A, B\}$$

- 2D grid (time × space):  
  $$\mathcal{G} = \mathcal{T} \times \mathcal{Y}$$

- Case classes for each cell:  
  $$C : \mathcal{G} \;\to\; \{\text{endemic}, \; \text{outbreak 1}\}$$

- Signal classes for each cell:  
  $$S : \mathcal{G} \;\to\; \{\text{endemic}, \; \text{alarm}, \; \text{non_case}\}$$

## Check Condition 1: Always exist sick person (No "non_case" label)

In [33]:
weeks = np.arange(1, 53)
y_vals = ["A", "B"]
labels = ["outbreak1", "endemic"]

rows = list(itertools.product(weeks, y_vals, labels))

cases1 = pd.DataFrame(rows, columns=["week", "y", "data_label"])
cases1["value"] = np.random.randint(1, 101, size=len(cases1))
cases1.head(5)

Unnamed: 0,week,y,data_label,value
0,1,A,outbreak1,75
1,1,A,endemic,40
2,1,B,outbreak1,12
3,1,B,endemic,95
4,2,A,outbreak1,46


In [34]:
signal_raw = cases1.copy()
signal_raw.rename(columns={'value': 'count', 'data_label' : 'signal_label'}, inplace=True)

signal_raw["signal_label"] = signal_raw["signal_label"].replace({"outbreak1": "alarm"})

# Randomize the p-value
gid = signal_raw.groupby(['week','y'], sort=False).ngroup()
a = np.random.rand(gid.max() + 1)
signal_raw['value'] = np.where(signal_raw['signal_label'].eq('endemic'), a[gid], 1 - a[gid])

signal_raw.head(5)

Unnamed: 0,week,y,signal_label,count,value
0,1,A,alarm,75,0.181259
1,1,A,endemic,40,0.818741
2,1,B,alarm,12,0.623874
3,1,B,endemic,95,0.376126
4,2,A,alarm,46,0.560408


In [35]:
# Non-Case signal
weeks = np.arange(1, 53)
y_vals = ["A", "B"]
labels = ["non_case"]
rows = list(itertools.product(weeks, y_vals, labels))

ns = pd.DataFrame(rows, columns=["week", "y", "signal_label"])
ns["value"] = 0
ns.head(5)

Unnamed: 0,week,y,signal_label,value
0,1,A,non_case,0
1,1,B,non_case,0
2,2,A,non_case,0
3,2,B,non_case,0
4,3,A,non_case,0


In [36]:
signal1 = pd.concat([signal_raw, ns]).reset_index(drop=True)

signal1["week"] = signal1["week"].astype(int)
signal1["y"] = signal1["y"].astype(str)
order = pd.CategoricalDtype(categories=["endemic", "alarm", "non_case"], ordered=True)

signal1["signal_label"] = signal1["signal_label"].astype(order)

signal1 = (signal1.sort_values(["week", "y", "signal_label"]).reset_index(drop=True))
signal1["count"] = signal1["count"].fillna(0)
signal1.head(5)

Unnamed: 0,week,y,signal_label,count,value
0,1,A,endemic,40.0,0.818741
1,1,A,alarm,75.0,0.181259
2,1,A,non_case,0.0,0.0
3,1,B,endemic,95.0,0.376126
4,1,B,alarm,12.0,0.623874


In [38]:
metrics_epi_quark1 = {
    "precision": score(cases1, signal1, "precision", 0.5, 0.5),
    "recall":    score(cases1, signal1, "recall", 0.5, 0.5),
    "f1":        score(cases1, signal1, "f1", 0.5, 0.5),
}

epi_quark1 = pd.DataFrame(metrics_epi_quark1, index=['outbreak1', 'endemic', 'non_case'])
epi_quark1 = epi_quark1.round(2)
epi_quark1
timeliness(cases, signal_done1, "Week", 2)

  .agg({"p(d,s|x)": sum})


ValueError: not enough values to unpack (expected 4, got 1)

## Check Condition 2: At least 1 "non_case" situation
I make only a "non_cases" situation on the week = 1, y = A.
This will be done by setting the cases DataFrame 'value' to be 0 on the aformentioned cell and setting the signal DataFrame 'value' to be 1 on the 'non_case' label and 0 else in the aformentioned cell.

As seen later, the 'epi-quark' function works and the metric related to 'non_case' give an expected results 1.

In [43]:
cases2 = cases1.copy()
cases2.loc[[0, 1], "value"] = 0
cases2.head(5)

Unnamed: 0,week,y,data_label,value
0,1,A,outbreak1,0
1,1,A,endemic,0
2,1,B,outbreak1,12
3,1,B,endemic,95
4,2,A,outbreak1,46


In [47]:
signal2 = signal1.copy()
signal2.loc[[0, 1], "count"] = 0
signal2.loc[[0, 1], "value"] = 0
signal2.loc[[2], "value"] = 1
signal2.head(5)

Unnamed: 0,week,y,signal_label,count,value
0,1,A,endemic,0.0,0.0
1,1,A,alarm,0.0,0.0
2,1,A,non_case,0.0,1.0
3,1,B,endemic,95.0,0.376126
4,1,B,alarm,12.0,0.623874


In [51]:
metrics_epi_quark2 = {
    "precision": score(cases2, signal2, "precision", 0.5, 0.5),
    "recall":    score(cases2, signal2, "recall", 0.5, 0.5),
    "f1":        score(cases2, signal2, "f1", 0.5, 0.5),
}

epi_quark2 = pd.DataFrame(metrics_epi_quark2, index=['outbreak1', 'endemic', 'non_case'])
epi_quark2 = epi_quark2.round(2)
print('Timeliness', timeliness(cases2, signal2, "week", 2))
epi_quark2

  .agg({"p(d,s|x)": sum})
  .agg({"p(d,s|x)": sum})


Timeliness {'outbreak1': np.float64(1.0)}


  .agg({"p(d,s|x)": sum})


Unnamed: 0,precision,recall,f1
outbreak1,0.38,0.51,0.44
endemic,0.54,0.44,0.49
non_case,1.0,1.0,1.0


## Check Condition 3: Extreme situation where the cases are binary
I want to model a setting where, during an outbreak, the signal DataFrame has p-value = 1 for the ‘alarm’ class and 0 otherwise, while the cases DataFrame has non-zero counts for the ‘outbreak’ class and zero counts for all other classes.

In [80]:
rng = np.random.default_rng(42)

n_rows = 104
labels = ["outbreak", "endemic", "non_case"]

# pick one random label index per row
choices = rng.integers(0, len(labels), size=n_rows)

# make one-hot matrix
onehot = np.zeros((n_rows, len(labels)), dtype=int)
onehot[np.arange(n_rows), choices] = 1
flat = onehot.ravel()
flat

array([1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0,
       0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0,
       1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0,
       1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,
       1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0,
       1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0,
       1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1,
       0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0,
       0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1,
       0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,
       1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1,
       0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0,

In [90]:
# Make Cases DataFrame
weeks = np.arange(1, 53)
y_vals = ["A", "B"]
labels = ["outbreak1", "endemic", 'non_case']

rows = list(itertools.product(weeks, y_vals, labels))

cases3 = pd.DataFrame(rows, columns=["week", "y", "data_label"])
cases3["value"] = flat

# Make Signal DataFrame
signal3 = cases3.copy()
signal3.rename(columns={'data_label' : 'signal_label'}, inplace=True)
signal3["signal_label"] = signal3["signal_label"].replace({"outbreak1": "alarm"})
signal3["value"] = signal3["value"].astype(float)

cases3 = cases3[cases3["data_label"] != "non_case"].reset_index(drop=True)

In [91]:
cases3.head(5)

Unnamed: 0,week,y,data_label,value
0,1,A,outbreak1,1
1,1,A,endemic,0
2,1,B,outbreak1,0
3,1,B,endemic,0
4,2,A,outbreak1,0


In [92]:
signal3.head(6)

Unnamed: 0,week,y,signal_label,value
0,1,A,alarm,1.0
1,1,A,endemic,0.0
2,1,A,non_case,0.0
3,1,B,alarm,0.0
4,1,B,endemic,0.0
5,1,B,non_case,1.0


In [94]:
metrics_epi_quark3 = {
    "precision": score(cases3, signal3, "precision", 0.5, 0.5),
    "recall":    score(cases3, signal3, "recall", 0.5, 0.5),
    "f1":        score(cases3, signal3, "f1", 0.5, 0.5),
}

epi_quark3 = pd.DataFrame(metrics_epi_quark3, index=['outbreak1', 'endemic', 'non_case'])
epi_quark3 = epi_quark3.round(2)
print('Timeliness', timeliness(cases3, signal3, "week", 2))
epi_quark3

  .agg({"p(d,s|x)": sum})
  .agg({"p(d,s|x)": sum})
  .agg({"p(d,s|x)": sum})


Timeliness {'outbreak1': np.float64(1.0)}


Unnamed: 0,precision,recall,f1
outbreak1,1.0,1.0,1.0
endemic,1.0,1.0,1.0
non_case,1.0,1.0,1.0
