In [1]:
import json
from copy import deepcopy
from pathlib import Path
from shutil import rmtree

import fire
import numpy as np
import pandas as pd

In [2]:
class Experiment:
    def __init__(
        self,
        n_iterations: int,
        rng: np.random.Generator,
        default_params: Path,
        experiment_params: Path,
        n_samples: int,
    ):
        self.n_iterations = n_iterations
        self.rng = rng
        self.default_params = self.load_json(default_params)
        self.experiment_params = self.load_json(experiment_params)
        self.n_samples = n_samples

        # self.covariate_values = self.draw_covariate_values()

    def load_json(self, path: Path):
        with open(path) as f:
            return json.load(f)

    def make_scenario_df(self):

        starting_dose_dist = self.rng.triangular(
            left=self.experiment_params['covariate_distributions']['triangular']['starting_dose']['left'],
            right=self.experiment_params['covariate_distributions']['triangular']['starting_dose']['right'],
            mode=self.experiment_params['covariate_distributions']['triangular']['starting_dose']['mode'],
            size=self.n_samples
        ) 

        dose_increase_dist = self.rng.triangular(
            left=self.experiment_params['covariate_distributions']['triangular']['dose_increase']['left'],
            right=self.experiment_params['covariate_distributions']['triangular']['dose_increase']['right'],
            mode=self.experiment_params['covariate_distributions']['triangular']['dose_increase']['mode'],
            size=self.n_samples
        ) 

        behavioral_variability_dist = self.rng.triangular(
            left=self.experiment_params['covariate_distributions']['triangular']['behavioral_variability']['left'],
            right=self.experiment_params['covariate_distributions']['triangular']['behavioral_variability']['right'],
            mode=self.experiment_params['covariate_distributions']['triangular']['behavioral_variability']['mode'],
            size=self.n_samples
        ) 

        availability_dist = self.rng.triangular(
            left=self.experiment_params['covariate_distributions']['triangular']['availability']['left'],
            right=self.experiment_params['covariate_distributions']['triangular']['availability']['right'],
            mode=self.experiment_params['covariate_distributions']['triangular']['availability']['mode'],
            size=self.n_samples
        ) 

        risk_params = self.experiment_params["covariate_distributions"][
            "multivariate_normal"
        ]["risk"]

        risk_array = self.rng.multivariate_normal(
            mean=np.array(risk_params["mean"]),
            cov=np.array(risk_params["cov"]),
            size=self.n_samples
        )

        risk_array = np.where(risk_array < 0, 0, risk_array)
        risk_array = np.where(risk_array > 1, 1, risk_array)

        covariate_df = pd.DataFrame(
            {
                'starting_dose':starting_dose_dist,
                'dose_increase':dose_increase_dist,
                'behavioral_variability':behavioral_variability_dist,
                'availability':availability_dist,
                'internal_risk':risk_array[:, 1],
                "external_risk":risk_array[:, 0],
            }
        )

        covariate_df['seed'] = [self.rng.integers(1, 2 ** 31 - 1) for _ in range(self.n_samples)]
        covariate_df['variable_group'] = [f"var_grp_{x}" for x in range(self.n_samples)]

        #covariate_df.to_csv(Path("experiment/param_df.csv"), index=False)

        return covariate_df

    def make_scenario_dirs(self):
        """
        Make dir for each scenario following

        - counterfeit_prob_x
            - dose_variability_x OR fentanyl_prob_X

        Each folder gets its own scenario default params. Then transform params only updates covariates.

        A final function should loop thru N iterations and transform params in each scenario N times.
        """

        # starting_dose_dist = self.rng.triangular(
        #     left=self.experiment_params['covariate_distributions']['triangular']['starting_dose']['left'],
        #     right=self.experiment_params['covariate_distributions']['triangular']['starting_dose']['right'],
        #     mode=self.experiment_params['covariate_distributions']['triangular']['starting_dose']['mode'],
        #     size=self.n_iterations
        # ) 

        # dose_increase_dist = self.rng.triangular(
        #     left=self.experiment_params['covariate_distributions']['triangular']['dose_increase']['left'],
        #     right=self.experiment_params['covariate_distributions']['triangular']['dose_increase']['right'],
        #     mode=self.experiment_params['covariate_distributions']['triangular']['dose_increase']['mode'],
        #     size=self.n_iterations
        # ) 

        # behavioral_variability_dist = self.rng.triangular(
        #     left=self.experiment_params['covariate_distributions']['triangular']['behavioral_variability']['left'],
        #     right=self.experiment_params['covariate_distributions']['triangular']['behavioral_variability']['right'],
        #     mode=self.experiment_params['covariate_distributions']['triangular']['behavioral_variability']['mode'],
        #     size=self.n_iterations
        # ) 

        # availability_dist = self.rng.triangular(
        #     left=self.experiment_params['covariate_distributions']['triangular']['availability']['left'],
        #     right=self.experiment_params['covariate_distributions']['triangular']['availability']['right'],
        #     mode=self.experiment_params['covariate_distributions']['triangular']['availability']['mode'],
        #     size=self.n_iterations
        # ) 

        # risk_params = self.experiment_params["covariate_distributions"][
        #     "multivariate_normal"
        # ]["risk"]

        # risk_array = self.rng.multivariate_normal(
        #     mean=np.array(risk_params["mean"]),
        #     cov=np.array(risk_params["cov"]),
        #     size=self.n_iterations
        # )

        # risk_array = np.where(risk_array < 0, 0, risk_array)
        # risk_array = np.where(risk_array > 1, 1, risk_array)

        for cprob in self.experiment_params["IV_levels"]["counterfeit_prob"]:
            for dvar in self.experiment_params["IV_levels"]["dose_variability"]:
                for fprob in self.experiment_params["IV_levels"]["fentanyl_prob"]:
                    # for user_type, user_params in self.experiment_params[
                    #     "user_types"
                    # ].items():

                    scenario_dir = Path(
                        f"experiment/scenarios/counterfeit_prob_{cprob}/dose_var_{dvar}_fent_prob_{fprob}"
                    )
                    scenario_dir.mkdir(parents=True, exist_ok=True)

                    scenario_params = deepcopy(self.default_params)

                    scenario_params["counterfeit_prob"] = cprob
                    scenario_params["dose_variability"] = dvar
                    scenario_params["fentanyl_prob"] = fprob

                        # scenario_params["starting_dose"] = float(np.random.choice(starting_dose_dist, 1))
                        # scenario_params["dose_increase"] = float(np.random.choice(dose_increase_dist, 1))
                        # scenario_params["behavioral_variability"] = float(np.random.choice(behavioral_variability_dist, 1))
                        # scenario_params["availability"] = float(np.random.choice(availability_dist, 1))

                        # scenario_params["internal_risk"] = risk_array[np.random.randint(risk_array.shape[0], size=1), :][0][1]
                        # scenario_params["external_risk"] = risk_array[np.random.randint(risk_array.shape[0], size=1), :][0][0]

                        # with open(scenario_dir.joinpath("params.json"), "w") as f:
                        #     json.dump(scenario_params, f)

                    print(scenario_params)

    def draw_covariate_values(self):
        covariate_values = {}

        for cov_name, cov_params in self.experiment_params["covariate_distributions"][
            "triangular"
        ].items():
            covariate_values[cov_name] = self.rng.triangular(
                left=cov_params["left"],
                mode=cov_params["mode"],
                right=cov_params["right"],
                size=self.n_iterations,
            )

        risk_params = self.experiment_params["covariate_distributions"][
            "multivariate_normal"
        ]["risk"]
        risk_array = self.rng.multivariate_normal(
            mean=np.array(risk_params["mean"]),
            cov=np.array(risk_params["cov"]),
            size=self.n_iterations,
        )
        risk_array = np.where(risk_array < 0, 0, risk_array)
        risk_array = np.where(risk_array > 1, 1, risk_array)

        covariate_values["external_risk"] = risk_array[:, 0]
        covariate_values["internal_risk"] = risk_array[:, 1]

        covariate_df = pd.DataFrame(covariate_values)
        return covariate_df

    def draw_seeds(self):
        seeds = [self.rng.integers(1, 2 ** 31 - 1) for _ in range(self.n_iterations)]
        seeds_file = Path("experiment/scenarios/seeds.txt")
        with open(seeds_file, mode="w") as f:
            for seed in seeds:
                f.write(str(seed))
                f.write("\n")

    def prepare(self):
        self.make_scenario_df()
        self.make_scenario_dirs()
        #self.draw_seeds()


def main(seed: int = 1, n_iterations: int = 100):
    rng = np.random.default_rng(seed)
    if Path("experiment/scenarios").exists():
        rmtree(Path("experiment/scenarios"))
    experiment = Experiment(
        n_iterations=n_iterations,
        rng=rng,
        default_params=Path("experiment/default_params.json"),
        experiment_params=Path("experiment/experiment_params.json"),
    )
    experiment.prepare()

In [3]:
experiment = Experiment(
    n_iterations=5,
    rng=np.random.default_rng(123),
    default_params=Path("experiment/default_params.json"),
    experiment_params=Path("experiment/experiment_params.json"),
    n_samples=100
)

FileNotFoundError: [Errno 2] No such file or directory: 'experiment/default_params.json'

In [4]:
os.getcwd()

'/Users/aberghammer/Documents/Projects/Virtual Opioid User/Repo/virtual-opioid-user/experiment/notebooks'

In [107]:
x = experiment.make_scenario_df()
x.head()

Unnamed: 0,starting_dose,dose_increase,behavioral_variability,availability,internal_risk,external_risk,seed
0,89.339377,24.652517,0.197852,0.955053,0.67157,0.423865,1611795120
1,91.100063,25.900527,0.047737,0.698784,0.713714,0.393092,1249344921
2,87.574839,14.161011,0.153908,0.686946,0.826808,0.579836,1238986391
3,64.09985,19.263722,0.226128,0.650962,0.493992,0.380326,924245456
4,66.899371,17.675115,0.03493,0.764242,0.659755,0.786893,1315343739


In [121]:
y = [dict(row) for id, row in x.iterrows()]
y

[{'starting_dose': 89.33937670521757,
  'dose_increase': 24.652517236062415,
  'behavioral_variability': 0.1978515008719174,
  'availability': 0.9550529135252117,
  'internal_risk': 0.6715696157013039,
  'external_risk': 0.4238648136928028,
  'seed': 1611795120.0},
 {'starting_dose': 91.1000626057982,
  'dose_increase': 25.900527438133356,
  'behavioral_variability': 0.04773733297495203,
  'availability': 0.6987835567881395,
  'internal_risk': 0.713713786062867,
  'external_risk': 0.39309211634885455,
  'seed': 1249344921.0},
 {'starting_dose': 87.57483891402563,
  'dose_increase': 14.161010624233327,
  'behavioral_variability': 0.1539084625507313,
  'availability': 0.6869460602663722,
  'internal_risk': 0.8268079265680734,
  'external_risk': 0.5798359162968068,
  'seed': 1238986391.0},
 {'starting_dose': 64.09984968540289,
  'dose_increase': 19.263721701622558,
  'behavioral_variability': 0.22612806262419674,
  'availability': 0.6509619281203339,
  'internal_risk': 0.4939915775100312,

In [None]:
class BatchSimulation:
    def __init__(self, params: Path, distribution_params: Path):
        self.params = load_json(params)
        self.dist_params = pd.read_csv(distribution_params)

    def simulate(self, parallel: bool = True):
        """
        Runs the specified number of simulations using the specified parameters.
        Generates the seeds for each simulation using the specified random number
        generator, allowing for reproducibility.
        """

        def simulate_one_person(self, param_dict: dict):
            person = Person(
                rng=Random(param_dict['seed']),
                starting_dose=param_dict["starting_dose"], #Could potentially sample from distributions here... or within the prepare.py file
                dose_increase=param_dict["dose_increase"],
                external_risk=param_dict["external_risk"],
                internal_risk=param_dict["internal_risk"],
                behavioral_variability=param_dict["behavioral_variability"],
            )
            simulation = Simulation(
                person=person,
                rng=Random(param_dict["seed"]),
                dose_variability=self.params["dose_variability"], 
                availability=param_dict["availability"],
                fentanyl_prob=self.params["fentanyl_prob"],
                counterfeit_prob=self.params["counterfeit_prob"],
            )
            simulation.simulate()
            return simulation

        if parallel is False:
            self.simulations = [simulate_one_person(self, dict(row)) for id, row in self.dist_params.iterrows()]

        if parallel is True:
            num_cores = multiprocessing.cpu_count()
            self.simulations = Parallel(n_jobs=num_cores)(
                delayed(simulate_one_person)(self, dict(row)) for id, row in self.dist_params.iterrows()
            )