In [1]:
setwd("../src/R/")

In [2]:
source("./Sampler_EU.R")

In [3]:
ie_ref_data <- read.csv("../../data/RealDataSplit/ie.rnd20.EU.csv")
sample_years <- read.csv("../../data/RealDataSplit/year.eu.csv")

In [4]:
ct_p_values <- (0:10)/10
nsamples_per <- 2000  # Number of samples per p value
pop_sizes <- c(300, 400, 500, 700, 1000)
# R0_values <- c(2, 3, 4, 5)
# make a data frame to hold the results

In [5]:
library(foreach)

In [6]:
library(foreach)
my.cluster <- parallel::makeCluster(26, type="FORK")  # adjust number of cores based on your system
doParallel::registerDoParallel(cl = my.cluster)

In [17]:
nsamples_per <- 200  # Small batch to calibrate
state_counter <- "10a"  # derived from 10c
pop_sizes <- c(500, 550, 600, 650, 700)
# pop_sizes <- c(700, 800, 900, 1000)
R0_values <- c(2, 2.5, 3, 4, 5, 6)
r0_iter <- rep(R0_values, each = length(pop_sizes))
ps_iter <- rep(pop_sizes, times = length(R0_values))

metadata_table <- data.frame(ps=ps_iter, r0=r0_iter)
# This probably doesn't need to be a parquet, but we'll take it anyways
names(metadata_table) <- c("pop_size", "R0")
arrow::write_parquet(metadata_table, paste0("../../data/RealDataSplit/metadata.small", state_counter, ".rnd20.EU.simulated.parquet"))
print(paste0("../../data/RealDataSplit/metadata.small", state_counter, ".rnd20.EU.simulated.parquet"))


result <- foreach (ps = ps_iter, r0 = r0_iter) %dopar% {
    set.seed(1947)
    si_df <- data.frame()
    ei_df <- data.frame()
    trees_df <- data.frame()
    for (ct_p in ct_p_values) {
        si_values = c()
        ei_values = c()
        # trees <- list()
        for (i in 1:nsamples_per) {
            # print(i)
            # set.seed(i)
            invisible(res <- simulate_EU_Thai_HIV(sample_size = 20, ct_p = ct_p, R0=r0,
                                        sample_times_data = sample_years$Freq, 
                                        sample_times_times = sample_years$X, pop_size=ps,
                                        l1=2, l2=4, R0_init = 3, stretch=18 * 12, integer_sampling = TRUE))
            si_values = c(si_values, res$sackin_index)
            if (res$sackin_index <= 88) {
                print(c(i, res$sackin_index))
            }
            ei_values = c(ei_values, res$EIr)
            # trees[[i]] <- ape::read.tree(text = res$newick_tree_string)
        }
        # si_df$ct_p <- si_values
        si_df <- rbind(si_df, data.frame(ct_p = rep(ct_p, nsamples_per), si = si_values))
        ei_df <- rbind(ei_df, data.frame(ct_p = rep(ct_p, nsamples_per), ei = ei_values))
        # trees_df <- rbind(trees_df, data.frame(ct_p = rep(ct_p, nsamples_per), tree = trees))
        print("X")
        print(c(min(si_values), mean(si_values), max(si_values), var(si_values)))
        flush.console()
    }

    # Using the above two lines as the basic idea, write the data to parquet files for each population size
    arrow::write_parquet(si_df, paste0("../../data/RealDataSplit/sackin.small", state_counter, ".rnd20.EU.simulated.", ps, ".", r0, ".parquet"))
    arrow::write_parquet(ei_df, paste0("../../data/RealDataSplit/ei.small", state_counter, ".rnd20.EU.simulated.", ps, ".", r0, ".parquet"))
}


[1] "../../data/RealDataSplit/metadata.small10a.rnd20.EU.simulated.parquet"


In [7]:
nsamples_per <- 200  # Small batch to calibrate
state_counter <- "15a"  # derived from 10a
pop_sizes <- c(600, 700, 800, 900, 1000)
# pop_sizes <- c(700, 800, 900, 1000)
R0_values <- c(2, 3, 4, 5, 6)
r0_iter <- rep(R0_values, each = length(pop_sizes))
ps_iter <- rep(pop_sizes, times = length(R0_values))

metadata_table <- data.frame(ps=ps_iter, r0=r0_iter)
# This probably doesn't need to be a parquet, but we'll take it anyways
names(metadata_table) <- c("pop_size", "R0")
arrow::write_parquet(metadata_table, paste0("../../data/RealDataSplit/metadata.small", state_counter, ".rnd20.EU.simulated.parquet"))
print(paste0("../../data/RealDataSplit/metadata.small", state_counter, ".rnd20.EU.simulated.parquet"))


result <- foreach (ps = ps_iter, r0 = r0_iter) %dopar% {
    set.seed(1948)
    si_df <- data.frame()
    ei_df <- data.frame()
    trees_df <- data.frame()
    for (ct_p in ct_p_values) {
        si_values = c()
        ei_values = c()
        # trees <- list()
        for (i in 1:nsamples_per) {
            # print(i)
            # set.seed(i)
            invisible(res <- simulate_EU_Thai_HIV(sample_size = 20, ct_p = ct_p, R0=r0,
                                        sample_times_data = sample_years$Freq, 
                                        sample_times_times = sample_years$X, pop_size=ps,
                                        l1=2, l2=4, R0_init = 5, stretch=18 * 12, integer_sampling = TRUE))
            si_values = c(si_values, res$sackin_index)
            if (res$sackin_index <= 88) {
                print(c(i, res$sackin_index))
            }
            ei_values = c(ei_values, res$EIr)
            # trees[[i]] <- ape::read.tree(text = res$newick_tree_string)
        }
        # si_df$ct_p <- si_values
        si_df <- rbind(si_df, data.frame(ct_p = rep(ct_p, nsamples_per), si = si_values))
        ei_df <- rbind(ei_df, data.frame(ct_p = rep(ct_p, nsamples_per), ei = ei_values))
        # trees_df <- rbind(trees_df, data.frame(ct_p = rep(ct_p, nsamples_per), tree = trees))
        print("X")
        print(c(min(si_values), mean(si_values), max(si_values), var(si_values)))
        flush.console()
    }

    # Using the above two lines as the basic idea, write the data to parquet files for each population size
    arrow::write_parquet(si_df, paste0("../../data/RealDataSplit/sackin.small", state_counter, ".rnd20.EU.simulated.", ps, ".", r0, ".parquet"))
    arrow::write_parquet(ei_df, paste0("../../data/RealDataSplit/ei.small", state_counter, ".rnd20.EU.simulated.", ps, ".", r0, ".parquet"))
}


[1] "../../data/RealDataSplit/metadata.small15a.rnd20.EU.simulated.parquet"
