In [1]:
libraries = c("dplyr","magrittr","tidyr","ggplot2","RColorBrewer","zoo","lubridate","tidyverse",
              "readxl","gridExtra","MASS","ggpubr", "mixdist", "changepoint", "changepoint.np")
for(x in libraries) {library(x, character.only=TRUE, warn.conflicts=FALSE, quietly=TRUE)}

'%&%' = function(x,y)paste0(x,y)

theme_set(theme_bw())
version$version.string

options(scipen=10000)

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mforcats[39m 1.0.0     [32m✔[39m [34mstringr[39m 1.5.1
[32m✔[39m [34mpurrr  [39m 1.0.2     [32m✔[39m [34mtibble [39m 3.2.1
[32m✔[39m [34mreadr  [39m 2.1.4     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mtidyr[39m::[32mextract()[39m   masks [34mmagrittr[39m::extract()
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m    masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m       masks [34mstats[39m::lag()
[31m✖[39m [34mpurrr[39m::[32mset_names()[39m masks [34mmagrittr[39m::set_names()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors
Successfully loaded changepoint package version 2.2.4
 See NEWS for details of changes.



# Settings

In [2]:
## Before the Omicron period
Final_analysis <- as.Date("2021-12-01") 

## Adding extra data points for back-projection
add = 30

## 7-days moving average
MV <- 7

In [3]:
## Incubation period for non-Omicron variants (Grant et al, 2021)
convert_lnorm <- function(mu, sd) {
    tmp <- log((sd / mu)^2 + 1)
    mulog <- log(mu) - 0.5 * tmp; sdlog <- sqrt(tmp)
    list(mulog = mulog, sdlog = sdlog)
}

inc_fit = list(meanlog=convert_lnorm(4.3, 2.4)$mulog, sdlog=convert_lnorm(4.3, 2.4)$sdlog)
incubation <- function(t){plnorm(t, inc_fit$meanlog, inc_fit$sdlog) - plnorm(t-1, inc_fit$meanlog, inc_fit$sdlog)}

## Incubation period for the Omicron variant (Backer et al, 2022)
inc_fit_omi = list(shape=1.50, scale=3.60)
incubation_omi <- function(t){pweibull(t, inc_fit_omi$shape, inc_fit_omi$scale) - 
                              pweibull(t-1, inc_fit_omi$shape, inc_fit_omi$scale)}

In [4]:
## Incidence data
readRDS("master_df.rds") -> master_df
min_onset <- as.Date("2020-01-10")

master_df %<>% rename(sex=성별, region=거주시도, severity='위중증 유무(유:1)') %>%
mutate(diff = case_when(!is.na(onset) ~ as.numeric(as.Date(published) - as.Date(onset))))
master_df$diff[is.na(master_df$diff)] <- 0 
master_df %<>% mutate(onset = case_when(!is.na(onset) & onset < min_onset ~ as.Date(NA),
                                        !is.na(onset) & onset > max(master_df$published) ~ as.Date(NA),
                                        diff >= 30 ~ as.Date(NA),
                                        TRUE ~ onset)) %>% arrange(published) %>% dplyr::select(-diff)

In [5]:
as.numeric(max(master_df$published)-1-min(master_df$published)) -> time.diff
ttime <- as.data.frame(c(0:time.diff))
colnames(ttime) <- c('t')
ttime %<>% mutate(published = min(master_df$published)+t)

In [6]:
## Time delay data
readRDS("delay_stepp.rds") -> delay_stepp
readRDS("temp_stepp.rds") -> temp_stepp
readRDS("delay_figure.rds") -> delay_figure
readRDS("delay_stepp_org.rds") -> delay_stepp_org
readRDS("fit_changepoint.rds") -> fit_changepoint

c(temp_stepp$date[cpts(fit_changepoint)[1]]+1, temp_stepp$date[cpts(fit_changepoint)[2]]+1, 
  temp_stepp$date[cpts(fit_changepoint)[3]]+1)

# Proportion of variants: non-Omicron & Omicron

In [7]:
readRDS("prop.rds") -> prop_variant

In [8]:
prop_variant %>% distinct() %>% spread(group, prop) %>%
mutate(Non_Omicron=1-Omicron) %>% dplyr::select(year, week, Date, Omicron, Non_Omicron) -> prop_variant_agg

min_backproj <- as.Date("2020-01-01")

## data for cases with the illness onset
as.numeric(as.Date("2022-08-01")-1-min_backproj) -> time_diff
ttime_variant <- as.data.frame(c(0:time_diff))
colnames(ttime_variant) <- c('t')
ttime_variant %<>% mutate(Date = min_backproj+t) %>% dplyr::select(-t)

merge(prop_variant_agg %>% dplyr::select(-c("year", "week")), 
      ttime_variant, by=c("Date"), all.y=TRUE) %>% fill(Omicron, Non_Omicron) -> prop_variant_final
prop_variant_final[is.na(prop_variant_final)] <- 0

## assumptions based on the designated periods
prop_variant_final %>% 
mutate(Omicron=case_when(Date < as.Date("2021-01-02") ~ 0, TRUE ~ Omicron),
       Non_Omicron=case_when(Date < as.Date("2021-01-02") ~ 1, TRUE ~ Non_Omicron)) -> prop_variant_assumed

# Backprojections

In [9]:
smooth_k = 4
min_backproj <- as.Date("2020-01-01")

### (1) Cases published before May 2020 (based on the estimated change points)

In [10]:
min_backproj <- as.Date("2020-01-01")

## data for cases with the illness onset
as.numeric(as.Date("2022-07-01")-1-min_backproj) -> time.diff_onset
ttime_onset <- as.data.frame(c(0:time.diff_onset))
colnames(ttime_onset) <- c('t')
ttime_onset %<>% mutate(onset = min_backproj+t)

master_df %>% filter(published < as.Date("2020-05-01")) %>% filter(!is.na(onset)) -> onset_Korea
onset_Korea %>% group_by(onset) %>% count() %>% as.data.frame() -> master_onset
merge(ttime_onset, master_onset, by='onset', all.x=TRUE) -> master_onset
master_onset[is.na(master_onset)] <- 0
master_onset %<>% dplyr::select(onset, n)

## data for cases with the unknown illness onset (excluding the asymptomatic cases)
master_df %>% filter(published < as.Date("2020-05-01")) %>% filter(is.na(onset)) -> unknown_Korea
unknown_Korea %>% group_by(published) %>% count() %>% as.data.frame() -> master_unknown
merge(ttime, master_unknown, by='published', all.x=TRUE) -> master_unknown
master_unknown[is.na(master_unknown)] <- 0
master_unknown %<>% dplyr::select(published, n)

temp_start <- matrix(NA, ncol=1, nrow=as.numeric(min(master_unknown$published)-as.Date("2020-01-01")))
temp_start %<>% as.data.frame() %>% 
mutate(published = as.Date(min_backproj:(min(master_unknown$published)-1)), n=0) %>% dplyr::select(-V1)
rbind(temp_start, master_unknown) -> master_unknown

### Backprojection from the published date to symptom onset

In [11]:
library(surveillance)

## adding extra 10 days for the stability of back-projection procedure
temp_lastdays <- matrix(NA, ncol=1, nrow=10)
temp_lastdays %<>% as.data.frame() %>% 
mutate(published=as.Date((max(master_unknown$published)+1):(max(master_unknown$published)+10)),n=0) %>% 
dplyr::select(-V1)
rbind(master_unknown, temp_lastdays) -> master_unknown
master_unknown %<>% mutate(time_onset = 1:nrow(master_unknown))


## time delay from the illness onset to published date
K = nrow(master_unknown)
report_probability = pweibull(1:K, shape=delay_stepp$shape[1], scale=delay_stepp$scale[1]) - 
                     pweibull(1:K-1, shape=delay_stepp$shape[1], scale=delay_stepp$scale[1])
report_pmf = c(0,report_probability[1:21])


## back-projecton of domestic cases
sts = new("sts", epoch=master_unknown$time_onset, observed=master_unknown$n)
bpnp.control = list(k = 4, eps = rep(1e-4,2), iter.max=rep(1000,2), 
                    Tmark = nrow(sts), B = -1, alpha = 0.01, verbose = FALSE, lambda0 = NULL, 
                    eq3a.method = c("R","C"))
sts_bp = backprojNP(sts, incu.pmf=report_pmf, control=modifyList(bpnp.control,list(eq3a.method="C")))
master_unknown$total_backproj = upperbound(sts_bp)


## normalizing the back-projected cases
master_unknown$total_backproj[master_unknown$total_backproj<=0.01] <- 0

master_unknown %>% mutate(total_normal = total_backproj/sum(total_backproj)*sum(n),
                          time_onset=0:(nrow(master_unknown)-1)) %>%
dplyr::select(time_onset, published, total_normal) %>% 
rename(t = time_onset, onset = published) -> dt.backproj_onset

## merge the back-projected data with cases whose date of illness onset was available
merge(master_onset, dt.backproj_onset, by=c('onset'), all.y=TRUE) %>% 
mutate(total = total_normal + n) %>% dplyr::select(onset, total) -> df_onset
df_onset[is.na(df_onset)] <- 0

ERROR: Error in library(surveillance): there is no package called ‘surveillance’


### Backprojection from the illness onset to time of infection

In [None]:
## adding extra 10 days for the stability of back-projection procedure
temp_lastdays <- matrix(NA, ncol=1, nrow=10)
temp_lastdays %<>% as.data.frame() %>% mutate(onset=as.Date((max(df_onset$onset)+1):(max(df_onset$onset)+10)),
                                              total=0) %>% dplyr::select(-V1)
rbind(df_onset, temp_lastdays) -> df_onset
df_onset %<>% mutate(time_onset = 1:nrow(df_onset))


## incubation period
K = nrow(df_onset)
incubation_probability = plnorm(1:K, inc_fit$meanlog, inc_fit$sdlog) - plnorm(1:K-1, inc_fit$meanlog, inc_fit$sdlog)
inc_pmf = c(0,incubation_probability[1:21])


## back-projecton of domestic cases
sts = new("sts", epoch=df_onset$time_onset, observed=df_onset$total)
bpnp.control = list(k = 4, eps = rep(1e-4,2), iter.max=rep(1000,2), 
                    Tmark = nrow(sts), B = -1, alpha = 0.01, verbose = FALSE, lambda0 = NULL, 
                    eq3a.method = c("R","C"))
sts_bp = backprojNP(sts, incu.pmf=inc_pmf, control=modifyList(bpnp.control,list(eq3a.method="C")))
df_onset$total_backproj = upperbound(sts_bp)


## normalizing the back-projected cases
df_onset$total_backproj[df_onset$total_backproj<=0.01] <- 0

df_onset %>% mutate(total_normal = total_backproj/sum(total_backproj)*sum(total),
                    time_onset=0:(nrow(df_onset)-1)) %>%
dplyr::select(time_onset, onset, total_normal) %>% 
rename(t = time_onset, total=total_normal) -> dt.backproj_before

### (2) Cases published from May 1, 2020 to May 31, 2021

In [None]:
## data for cases with the illness onset
master_df %>% filter(published >= as.Date("2020-05-01") & published < as.Date("2021-06-01")) %>% 
filter(!is.na(onset)) -> onset_Korea
onset_Korea %>% group_by(onset) %>% count() %>% as.data.frame() -> master_onset
merge(ttime_onset, master_onset, by='onset', all.x=TRUE) -> master_onset
master_onset[is.na(master_onset)] <- 0
master_onset %<>% dplyr::select(onset, n)

## data for cases with the unknown illness onset (excluding the asymptomatic cases)
master_df %>% filter(published >= as.Date("2020-05-01") & published < as.Date("2021-06-01")) %>% 
filter(is.na(onset)) -> unknown_Korea
unknown_Korea %>% group_by(published) %>% count() %>% as.data.frame() -> master_unknown
merge(ttime, master_unknown, by='published', all.x=TRUE) -> master_unknown
master_unknown[is.na(master_unknown)] <- 0
master_unknown %<>% dplyr::select(published, n)

temp_start <- matrix(NA, ncol=1, nrow=as.numeric(min(master_unknown$published)-as.Date("2020-01-01")))
temp_start %<>% as.data.frame() %>% 
mutate(published = as.Date(min_backproj:(min(master_unknown$published)-1)), n=0) %>% dplyr::select(-V1)
rbind(temp_start, master_unknown) -> master_unknown

### Backprojection from the published date to symptom onset

In [None]:
## adding extra 10 days for the stability of back-projection procedure
temp_lastdays <- matrix(NA, ncol=1, nrow=10)
temp_lastdays %<>% as.data.frame() %>% 
mutate(published=as.Date((max(master_unknown$published)+1):(max(master_unknown$published)+10)),n=0) %>% 
dplyr::select(-V1)
rbind(master_unknown, temp_lastdays) -> master_unknown
master_unknown %<>% mutate(time_onset = 1:nrow(master_unknown))


## time delay from the illness onset to published date
K = nrow(master_unknown)
report_probability = pweibull(1:K, shape=delay_stepp$shape[2], scale=delay_stepp$scale[2]) - 
                     pweibull(1:K-1, shape=delay_stepp$shape[2], scale=delay_stepp$scale[2])
report_pmf = c(0,report_probability[1:21])


## back-projecton of domestic cases
sts = new("sts", epoch=master_unknown$time_onset, observed=master_unknown$n)
bpnp.control = list(k = 4, eps = rep(1e-4,2), iter.max=rep(1000,2), 
                    Tmark = nrow(sts), B = -1, alpha = 0.01, verbose = FALSE, lambda0 = NULL, 
                    eq3a.method = c("R","C"))
sts_bp = backprojNP(sts, incu.pmf=report_pmf, control=modifyList(bpnp.control,list(eq3a.method="C")))
master_unknown$total_backproj = upperbound(sts_bp)


## normalizing the back-projected cases
master_unknown$total_backproj[master_unknown$total_backproj<=0.01] <- 0

master_unknown %>% mutate(total_normal = total_backproj/sum(total_backproj)*sum(n),
                          time_onset=0:(nrow(master_unknown)-1)) %>%
dplyr::select(time_onset, published, total_normal) %>% 
rename(t = time_onset, onset = published) -> dt.backproj_onset

## merge the back-projected data with cases whose date of illness onset was available
merge(master_onset, dt.backproj_onset, by=c('onset'), all.y=TRUE) %>% 
mutate(total = total_normal + n) %>% dplyr::select(onset, total) -> df_onset
df_onset[is.na(df_onset)] <- 0

### Backprojection from the illness onset to time of infection

In [None]:
## adding extra 10 days for the stability of back-projection procedure
temp_lastdays <- matrix(NA, ncol=1, nrow=10)
temp_lastdays %<>% as.data.frame() %>% mutate(onset=as.Date((max(df_onset$onset)+1):(max(df_onset$onset)+10)),
                                              total=0) %>% dplyr::select(-V1)
rbind(df_onset, temp_lastdays) -> df_onset
df_onset %<>% mutate(time_onset = 1:nrow(df_onset))


## incubation period
K = nrow(df_onset)
incubation_probability = plnorm(1:K, inc_fit$meanlog, inc_fit$sdlog) - plnorm(1:K-1, inc_fit$meanlog, inc_fit$sdlog)
inc_pmf = c(0,incubation_probability[1:21])


## back-projecton of domestic cases
sts = new("sts", epoch=df_onset$time_onset, observed=df_onset$total)
bpnp.control = list(k = 4, eps = rep(1e-4,2), iter.max=rep(1000,2), 
                    Tmark = nrow(sts), B = -1, alpha = 0.01, verbose = FALSE, lambda0 = NULL, 
                    eq3a.method = c("R","C"))
sts_bp = backprojNP(sts, incu.pmf=inc_pmf, control=modifyList(bpnp.control,list(eq3a.method="C")))
df_onset$total_backproj = upperbound(sts_bp)


## normalizing the back-projected cases
df_onset$total_backproj[df_onset$total_backproj<=0.01] <- 0

df_onset %>% mutate(total_normal = total_backproj/sum(total_backproj)*sum(total),
                    time_onset=0:(nrow(df_onset)-1)) %>%
dplyr::select(time_onset, onset, total_normal) %>% 
rename(t = time_onset, total=total_normal) -> dt.backproj_after

### (3) Cases reported from June, 1 2021 to February 28, 2022 (based on the estimated change points)

In [None]:
## cases with the reported symptom onset
master_df %>% 
filter(published >= as.Date("2021-06-01") & published < as.Date("2022-03-01")) %>% filter(!is.na(onset)) -> onset_Korea
onset_Korea %>% group_by(onset) %>% count() %>% as.data.frame() -> master_onset
merge(ttime_onset, master_onset, by='onset', all.x=TRUE) -> master_onset
master_onset[is.na(master_onset)] <- 0
master_onset %>% dplyr::select(onset, n) -> master_onset

merge(master_onset, prop_variant_assumed %>% rename(onset=Date), by=c("onset"), all.x=TRUE) %>%
mutate(n_Omi = n*Omicron, n_non_Omi=n*Non_Omicron) %>% 
dplyr::select(onset, n_Omi, n_non_Omi) -> master_onset_all

In [None]:
## cases with unknown symptom onset date (excluding the asymptomatic cases)
master_df %>% filter(published >= as.Date("2021-06-01") & published < as.Date("2022-03-01")) %>%
filter(is.na(onset)) -> unknown_Korea
unknown_Korea %>% group_by(published) %>% count() %>% as.data.frame() -> master_unknown
merge(ttime, master_unknown, by='published', all.x=TRUE) -> master_unknown
master_unknown[is.na(master_unknown)] <- 0
master_unknown %<>% dplyr::select(published, n)

temp_start <- matrix(NA, ncol=1, nrow=as.numeric(min(master_unknown$published)-as.Date("2020-01-01")))
temp_start %<>% as.data.frame() %>% 
mutate(published = as.Date(min_backproj:(min(master_unknown$published)-1)), n=0) %>% dplyr::select(-V1)
rbind(temp_start, master_unknown) -> master_unknown

merge(master_unknown, prop_variant_assumed %>% rename(published=Date), by=c("published"), all.x=TRUE) %>%
filter(published >= min_backproj) %>%
mutate(n_Omi = n*Omicron, n_non_Omi=n*Non_Omicron) %>% 
dplyr::select(published, n_Omi, n_non_Omi) -> master_unknown_all

### (3-1) Non-Omicron cases

In [None]:
master_onset_all %>% dplyr::select(onset, n_non_Omi) %>% rename(n=n_non_Omi) -> master_onset
master_unknown_all %>% dplyr::select(published, n_non_Omi) %>% rename(n=n_non_Omi) -> master_unknown

### Backprojection from the published date to symptom onset

In [None]:
## adding extra 10 days for the stability of back-projection procedure
temp_lastdays <- matrix(NA, ncol=1, nrow=10)
temp_lastdays %<>% as.data.frame() %>% 
mutate(published=as.Date((max(master_unknown$published)+1):(max(master_unknown$published)+10)),n=0) %>% 
dplyr::select(-V1)
rbind(master_unknown, temp_lastdays) -> master_unknown
master_unknown %<>% mutate(time_onset = 1:nrow(master_unknown))


## time delay from the illness onset to published date
K = nrow(master_unknown)
report_probability = pweibull(1:K, shape=delay_stepp$shape[3], scale=delay_stepp$scale[3]) - 
                     pweibull(1:K-1, shape=delay_stepp$shape[3], scale=delay_stepp$scale[3])
report_pmf = c(0,report_probability[1:21])


## back-projecton of domestic cases
sts = new("sts", epoch=master_unknown$time_onset, observed=master_unknown$n)
bpnp.control = list(k = smooth_k, eps = rep(1e-4,2), iter.max=rep(1000,2), 
                    Tmark = nrow(sts), B = -1, alpha = 0.01, verbose = FALSE, lambda0 = NULL, 
                    eq3a.method = c("R","C"))
sts_bp = backprojNP(sts, incu.pmf=report_pmf, control=modifyList(bpnp.control,list(eq3a.method="C")))
master_unknown$total_backproj = upperbound(sts_bp)


## normalizing the back-projected cases
master_unknown$total_backproj[master_unknown$total_backproj<=0.01] <- 0

master_unknown %>% mutate(total_normal = total_backproj/sum(total_backproj)*sum(n),
                          time_onset=0:(nrow(master_unknown)-1)) %>%
dplyr::select(time_onset, published, total_normal) %>% 
rename(t = time_onset, onset = published) -> dt.backproj_onset

## merge the back-projected data with cases whose date of illness onset was available
merge(master_onset, dt.backproj_onset, by=c('onset'), all.y=TRUE) %>% 
mutate(total = total_normal + n) %>% dplyr::select(onset, total) -> df_onset
df_onset[is.na(df_onset)] <- 0

### Backprojection from the illness onset to time of infection

In [None]:
## adding extra 10 days for the stability of back-projection procedure
temp_lastdays <- matrix(NA, ncol=1, nrow=10)
temp_lastdays %<>% as.data.frame() %>% mutate(onset=as.Date((max(df_onset$onset)+1):(max(df_onset$onset)+10)),
                                              total=0) %>% dplyr::select(-V1)
rbind(df_onset, temp_lastdays) -> df_onset
df_onset %<>% mutate(time_onset = 1:nrow(df_onset))


## incubation period
K = nrow(df_onset)
incubation_probability = plnorm(1:K, inc_fit$meanlog, inc_fit$sdlog) - plnorm(1:K-1, inc_fit$meanlog, inc_fit$sdlog)
inc_pmf = c(0,incubation_probability[1:21])


## back-projecton of domestic cases
sts = new("sts", epoch=df_onset$time_onset, observed=df_onset$total)
bpnp.control = list(k = smooth_k, eps = rep(1e-4,2), iter.max=rep(1000,2), 
                    Tmark = nrow(sts), B = -1, alpha = 0.01, verbose = FALSE, lambda0 = NULL, 
                    eq3a.method = c("R","C"))
sts_bp = backprojNP(sts, incu.pmf=inc_pmf, control=modifyList(bpnp.control,list(eq3a.method="C")))
df_onset$total_backproj = upperbound(sts_bp)


## normalizing the back-projected cases
df_onset$total_backproj[df_onset$total_backproj<=0.01] <- 0

df_onset %>% mutate(total_normal = total_backproj/sum(total_backproj)*sum(total),
                    time_onset=0:(nrow(df_onset)-1)) %>%
dplyr::select(time_onset, onset, total_normal) %>% 
rename(t = time_onset, total=total_normal) -> dt.backproj_after2_non_Omi

### (3-2) Omicron cases

In [None]:
master_onset_all %>% dplyr::select(onset, n_Omi) %>% rename(n=n_Omi) -> master_onset
master_unknown_all %>% dplyr::select(published, n_Omi) %>% rename(n=n_Omi) -> master_unknown

### Backprojection from the published date to symptom onset

In [None]:
## adding extra 10 days for the stability of back-projection procedure
temp_lastdays <- matrix(NA, ncol=1, nrow=10)
temp_lastdays %<>% as.data.frame() %>% 
mutate(published=as.Date((max(master_unknown$published)+1):(max(master_unknown$published)+10)),n=0) %>% 
dplyr::select(-V1)
rbind(master_unknown, temp_lastdays) -> master_unknown
master_unknown %<>% mutate(time_onset = 1:nrow(master_unknown))


## time delay from the illness onset to published date
K = nrow(master_unknown)
report_probability = pweibull(1:K, shape=delay_stepp$shape[3], scale=delay_stepp$scale[3]) - 
                     pweibull(1:K-1, shape=delay_stepp$shape[3], scale=delay_stepp$scale[3])
report_pmf = c(0,report_probability[1:21])


## back-projecton of domestic cases
sts = new("sts", epoch=master_unknown$time_onset, observed=master_unknown$n)
bpnp.control = list(k = smooth_k, eps = rep(1e-4,2), iter.max=rep(1000,2), 
                    Tmark = nrow(sts), B = -1, alpha = 0.01, verbose = FALSE, lambda0 = NULL, 
                    eq3a.method = c("R","C"))
sts_bp = backprojNP(sts, incu.pmf=report_pmf, control=modifyList(bpnp.control,list(eq3a.method="C")))
master_unknown$total_backproj = upperbound(sts_bp)


## normalizing the back-projected cases
master_unknown$total_backproj[master_unknown$total_backproj<=0.01] <- 0

master_unknown %>% mutate(total_normal = total_backproj/sum(total_backproj)*sum(n),
                          time_onset=0:(nrow(master_unknown)-1)) %>%
dplyr::select(time_onset, published, total_normal) %>% 
rename(t = time_onset, onset = published) -> dt.backproj_onset

## merge the back-projected data with cases whose date of illness onset was available
merge(master_onset, dt.backproj_onset, by=c('onset'), all.y=TRUE) %>% 
mutate(total = total_normal + n) %>% dplyr::select(onset, total) -> df_onset
df_onset[is.na(df_onset)] <- 0

### Backprojection from the illness onset to time of infection

In [None]:
## adding extra 10 days for the stability of back-projection procedure
temp_lastdays <- matrix(NA, ncol=1, nrow=10)
temp_lastdays %<>% as.data.frame() %>% mutate(onset=as.Date((max(df_onset$onset)+1):(max(df_onset$onset)+10)),
                                              total=0) %>% dplyr::select(-V1)
rbind(df_onset, temp_lastdays) -> df_onset
df_onset %<>% mutate(time_onset = 1:nrow(df_onset))


## incubation period
K = nrow(df_onset)
incubation_probability = pweibull(1:K, inc_fit_omi$shape, inc_fit_omi$scale) - 
                         pweibull(1:K-1, inc_fit_omi$shape, inc_fit_omi$scale)
inc_pmf = c(0,incubation_probability[1:21])


## back-projecton of domestic cases
sts = new("sts", epoch=df_onset$time_onset, observed=df_onset$total)
bpnp.control = list(k = smooth_k, eps = rep(1e-4,2), iter.max=rep(1000,2), 
                    Tmark = nrow(sts), B = -1, alpha = 0.01, verbose = FALSE, lambda0 = NULL, 
                    eq3a.method = c("R","C"))
sts_bp = backprojNP(sts, incu.pmf=inc_pmf, control=modifyList(bpnp.control,list(eq3a.method="C")))
df_onset$total_backproj = upperbound(sts_bp)


## normalizing the back-projected cases
df_onset$total_backproj[df_onset$total_backproj<=0.01] <- 0

df_onset %>% mutate(total_normal = total_backproj/sum(total_backproj)*sum(total),
                    time_onset=0:(nrow(df_onset)-1)) %>%
dplyr::select(time_onset, onset, total_normal) %>% 
rename(t = time_onset, total=total_normal) -> dt.backproj_after2_Omi

### (4) Cases reported from March 1, 2022 (based on the estimated change points)

In [None]:
## data for cases with the illness onset
master_df %>% filter(published >= as.Date("2022-03-01")) %>% filter(!is.na(onset)) -> onset_Korea
onset_Korea %>% group_by(onset) %>% count() %>% as.data.frame() -> master_onset
merge(ttime_onset, master_onset, by='onset', all.x=TRUE) -> master_onset
master_onset[is.na(master_onset)] <- 0
master_onset %<>% dplyr::select(onset, n)

## data for cases with the unknown illness onset (excluding the asymptomatic cases)
master_df %>% filter(published >= as.Date("2022-03-01")) %>% filter(is.na(onset)) -> unknown_Korea
unknown_Korea %>% group_by(published) %>% count() %>% as.data.frame() -> master_unknown
merge(ttime, master_unknown, by='published', all.x=TRUE) -> master_unknown
master_unknown[is.na(master_unknown)] <- 0
master_unknown %<>% dplyr::select(published, n)

temp_start <- matrix(NA, ncol=1, nrow=as.numeric(min(master_unknown$published)-as.Date("2020-01-01")))
temp_start %<>% as.data.frame() %>% 
mutate(published = as.Date(min_backproj:(min(master_unknown$published)-1)), n=0) %>% dplyr::select(-V1)
rbind(temp_start, master_unknown) -> master_unknown

### Backprojection from the published date to symptom onset

In [None]:
## adding extra 10 days for the stability of back-projection procedure
temp_lastdays <- matrix(NA, ncol=1, nrow=10)
temp_lastdays %<>% as.data.frame() %>% 
mutate(published=as.Date((max(master_unknown$published)+1):(max(master_unknown$published)+10)),n=0) %>% 
dplyr::select(-V1)
rbind(master_unknown, temp_lastdays) -> master_unknown
master_unknown %<>% mutate(time_onset = 1:nrow(master_unknown))


## time delay from the illness onset to published date
K = nrow(master_unknown)
report_probability = pweibull(1:K, shape=delay_stepp$shape[4], scale=delay_stepp$scale[4]) - 
                     pweibull(1:K-1, shape=delay_stepp$shape[4], scale=delay_stepp$scale[4])
report_pmf = c(0,report_probability[1:21])


## back-projecton of domestic cases
sts = new("sts", epoch=master_unknown$time_onset, observed=master_unknown$n)
bpnp.control = list(k = smooth_k, eps = rep(1e-4,2), iter.max=rep(1000,2), 
                    Tmark = nrow(sts), B = -1, alpha = 0.01, verbose = FALSE, lambda0 = NULL, 
                    eq3a.method = c("R","C"))
sts_bp = backprojNP(sts, incu.pmf=report_pmf, control=modifyList(bpnp.control,list(eq3a.method="C")))
master_unknown$total_backproj = upperbound(sts_bp)


## normalizing the back-projected cases
master_unknown$total_backproj[master_unknown$total_backproj<=0.01] <- 0

master_unknown %>% mutate(total_normal = total_backproj/sum(total_backproj)*sum(n),
                          time_onset=0:(nrow(master_unknown)-1)) %>%
dplyr::select(time_onset, published, total_normal) %>% 
rename(t = time_onset, onset = published) -> dt.backproj_onset

## merge the back-projected data with cases whose date of illness onset was available
merge(master_onset, dt.backproj_onset, by=c('onset'), all.y=TRUE) %>% 
mutate(total = total_normal + n) %>% dplyr::select(onset, total) -> df_onset
df_onset[is.na(df_onset)] <- 0

### Backprojection from the illness onset to time of infection

In [None]:
## adding extra 10 days for the stability of back-projection procedure
temp_lastdays <- matrix(NA, ncol=1, nrow=10)
temp_lastdays %<>% as.data.frame() %>% mutate(onset=as.Date((max(df_onset$onset)+1):(max(df_onset$onset)+10)),
                                              total=0) %>% dplyr::select(-V1)
rbind(df_onset, temp_lastdays) -> df_onset
df_onset %<>% mutate(time_onset = 1:nrow(df_onset))


## incubation period
K = nrow(df_onset)
incubation_probability = pweibull(1:K, inc_fit_omi$shape, inc_fit_omi$scale) - 
                         pweibull(1:K-1, inc_fit_omi$shape, inc_fit_omi$scale)
inc_pmf = c(0,incubation_probability[1:21])


## back-projecton of domestic cases
sts = new("sts", epoch=df_onset$time_onset, observed=df_onset$total)
bpnp.control = list(k = smooth_k, eps = rep(1e-4,2), iter.max=rep(1000,2), 
                    Tmark = nrow(sts), B = -1, alpha = 0.01, verbose = FALSE, lambda0 = NULL, 
                    eq3a.method = c("R","C"))
sts_bp = backprojNP(sts, incu.pmf=inc_pmf, control=modifyList(bpnp.control,list(eq3a.method="C")))
df_onset$total_backproj = upperbound(sts_bp)


## normalizing the back-projected cases
df_onset$total_backproj[df_onset$total_backproj<=0.01] <- 0

df_onset %>% mutate(total_normal = total_backproj/sum(total_backproj)*sum(total),
                    time_onset=0:(nrow(df_onset)-1)) %>%
dplyr::select(time_onset, onset, total_normal) %>% 
rename(t = time_onset, total=total_normal) -> dt.backproj_after3

## Merging all the back-projected incidences

In [None]:
## For the non-Omicron cases
df_list <- list(dt.backproj_before, dt.backproj_after, dt.backproj_after2_non_Omi)
Reduce(function(x, y) merge(x, y, by=c("onset"), all=TRUE), df_list) %>%
rowwise() %>% mutate(total_agg=sum(c_across(cols = starts_with('total')))) %>% ungroup() %>%
dplyr::select(onset, t, total_agg) %>% rename(total=total_agg) -> dt.backproj_non_Omi
       
## for the Omicron cases
merge(dt.backproj_after2_Omi, dt.backproj_after3, by=c("onset"), all=TRUE) %>% 
rowwise() %>% mutate(total_agg=sum(c_across(cols = starts_with('total')))) %>% ungroup() %>% 
dplyr::select(onset, t.x, total_agg) %>% rename(t=t.x, total=total_agg) -> dt.backproj_Omi       
       
## for all cases
df_list <- list(dt.backproj_before, dt.backproj_after %>% rename(total_after=total, t_after=t),
                dt.backproj_after2_non_Omi %>% rename(total_after2_non_Omi=total, t_after2_non_Omi=t), 
                dt.backproj_after2_Omi %>% rename(total_after2_Omi=total, t_after2_Omi=t), 
                dt.backproj_after3 %>% rename(total_after3=total, t_after3=t))
Reduce(function(x, y) merge(x, y, by=c("onset"), all=TRUE), df_list) %>%
rowwise() %>% mutate(total_agg=sum(c_across(cols = starts_with('total')))) %>% ungroup() %>%
dplyr::select(onset, t, total_agg) %>% rename(total=total_agg)  -> dt.backproj_all

saveRDS(dt.backproj_after2_non_Omi, "backproj_cases_non_Omi.rds")
saveRDS(dt.backproj_after2_Omi, "backproj_cases_Omi.rds")
saveRDS(dt.backproj_after3, "backproj_cases_last.rds")

In [None]:
## 7-days moving average
dt.backproj_non_Omi %>%
mutate(MA_total = zoo::rollmean(total, k=MV, align="right", fill=NA)) %>% na.omit() %>%
mutate(total_normal = MA_total/sum(MA_total)*sum(total)) %>% dplyr::select(onset, t, total_normal) %>%
rename(total=total_normal) -> dt.backproj_non_Omi_MA

dt.backproj_Omi %>%
mutate(MA_total = zoo::rollmean(total, k=MV, align="right", fill=NA)) %>% na.omit() %>%
mutate(total_normal = MA_total/sum(MA_total)*sum(total)) %>% dplyr::select(onset, t, total_normal) %>%
rename(total=total_normal) -> dt.backproj_Omi_MA

dt.backproj_non_Omi_MA %<>% mutate(t=0:(nrow(dt.backproj_non_Omi_MA)-1))
dt.backproj_Omi_MA %<>% mutate(t=0:(nrow(dt.backproj_Omi_MA)-1))

saveRDS(dt.backproj_non_Omi_MA, "backproj_cases_non_Omi_MA.rds")
saveRDS(dt.backproj_Omi_MA, "backproj_cases_Omi_MA.rds")