In [None]:
import numpy as np
import pandas as pd
import torch
torch.set_default_tensor_type(torch.FloatTensor) 
import copy
import sys
import os
notebook_dir = os.getcwd()
parent_dir = os.path.dirname(notebook_dir)
# Add the parent directory to the Python path
sys.path.append(parent_dir)

from rct_data_generator import *
from outcome_models import *
from plotting_functions import *
from mcmc_bayes_update import *
from eig_comp_utils import *
from research_exp_utils import *
import uci_dataset as dataset

In [None]:
number_of_candidate_sites = 4
min_sample_size_cand = 150
max_sample_size_cand = 250
host_sample_size = 200
outcome_function = lambda X, T, eps: 1 + 1 * X['Sex'] - 1 * X['Weight.viscera'] + np.log(X['Weight.whole']) - \
    X['Height'] + 4 * T + 2* X['Weight.shucked']*T + 24* X['Weight.shell']*T + 0* X['Weight.shucked']*T + eps 
std_true_y = 1
power_x = 1
power_x_t = 1

exp_parameters = {'number_of_candidate_sites': number_of_candidate_sites, 'min_sample_size_cand': min_sample_size_cand, \
                'max_sample_size_cand': max_sample_size_cand, 'host_sample_size': host_sample_size, 'outcome_function': outcome_function, \
                'std_true_y': std_true_y, 'power_x': power_x, 'power_x_t': power_x_t}

initial_data = dataset.load_abalone()

In [None]:
def generating_random_sites_from(data, exp_parameters):

    sites = {}

    sample_size, num_features = np.shape(data)[0], np.shape(data)[1]
    function_indices = {0:lambda X: np.log(X), 1:lambda X: np.sqrt(X) ,2:lambda X: X, 3:lambda X: X**2}
    T = np.random.randint(2, size=sample_size)

    number_of_candidate_sites, min_sample_size_cand, max_sample_size_cand = exp_parameters['number_of_candidate_sites'], \
        exp_parameters['min_sample_size_cand'], exp_parameters['max_sample_size_cand']
    outcome_function, std_true_y, power_x, power_x_t = exp_parameters['outcome_function'], exp_parameters['std_true_y'], \
             exp_parameters['power_x'], exp_parameters['power_x_t']

    for i in range (number_of_candidate_sites):
        num_features_for_subsampling = np.random.randint(0, num_features)
        random_coefs = [np.random.uniform(-10, 10) for _ in range(num_features_for_subsampling)]
        random_fct_idx = [np.random.randint(0, 3) for _ in range(num_features_for_subsampling)]
        p_assigned_to_site = lambda X, T, eps: np.sum([random_coefs[j]*function_indices[random_fct_idx[j]] for j in range (num_features_for_subsampling)])
        sample_size = np.random.randint(min_sample_size_cand, max_sample_size_cand)
        design_data_cand2 = generate_cand2(data, T, p_assigned_to_site, sample_size,
                                            power_x, power_x_t, outcome_function, std_true_y) 
        sites[i] = design_data_cand2
    
    return sites



In [None]:
sites = generating_random_sites_from(initial_data, exp_parameters)