# Set up

In [1]:
import numpy as np
import pandas as pd
from typing import List, Tuple

In [2]:
RNG = np.random.default_rng(827)

LOW, HIGH = 0.1, 5.0
BASELINE = 1.0
CENTRES = [1.0, 1.1, 1.5, 2.0, 3.0]
REPLICATES_PER_REGION = 20

In [3]:
def sample_trunc_bvn(mean: np.ndarray, cov: np.ndarray, low: float, high: float) -> np.ndarray:
    while True:
        x = RNG.multivariate_normal(mean, cov)
        if low <= x[0] <= high and low <= x[1] <= high:
            return x

def build_datasets_for_centre(m: float, sd2: float = 0.1, rho: float = 0.3) -> pd.DataFrame:
    sd = np.sqrt(sd2)
    cov = np.array([[sd**2, rho*sd**2],
                    [rho*sd**2, sd**2]])
    rows = []
    for region in range(1, 6):
        for _ in range(REPLICATES_PER_REGION):
            mean = np.array([m, m])
            a_r, b_r = sample_trunc_bvn(mean, cov, LOW, HIGH)
            params = []
            for r in range(1, 6):
                if r == region:
                    params.extend([a_r, b_r])
                else:
                    params.extend([BASELINE, BASELINE])
            rows.append(params)
    df = pd.DataFrame(rows, columns=[f"{ab}{r}" for r in range(1, 6) for ab in ("a","b")])
    return df




In [16]:
m=4
df = build_datasets_for_centre(CENTRES[m], sd2=0.1, rho=0.3)

In [None]:
df.shape

In [None]:
df

In [17]:
df = df.to_numpy()
m, n = df.shape


new_data = np.zeros((m, 34))


mapping = {
    0: [4, 16],
    1: [5, 17],
    2: [10, 22],
    3: [11, 23],
    4: [8, 6, 20, 18],
    5: [9, 7, 21, 19],
    6: [2, 0, 14, 12],
    7: [3, 1, 15, 13],
    8: [24, 26, 28, 30, 32],
    9: [25, 27, 29, 31, 33]
}

for orig_col, new_cols in mapping.items():
    for new_col in new_cols:
        new_data[:, new_col] = df[:, orig_col]

In [18]:
np.savetxt("LocalDisease/X_3.txt", new_data, fmt='%0.4f')

In [3]:
X_train = pd.read_csv('X_1_1.txt', header=None, delimiter=' ').values

m = X_train.shape[0]

mapping = {
    0: [4, 16],
    1: [5, 17],
    2: [10, 22],
    3: [11, 23],
    4: [8, 6, 20, 18],
    5: [9, 7, 21, 19],
    6: [2, 0, 14, 12],
    7: [3, 1, 15, 13],
    8: [24, 26, 28, 30, 32],
    9: [25, 27, 29, 31, 33]
}

X_all_recovered = np.zeros((m, len(mapping)))


for orig_col, new_cols in mapping.items():

    X_all_recovered[:, orig_col] = X_train[:, new_cols].mean(axis=1)

X_train = np.around(X_all_recovered, decimals=4)

In [4]:
np.savetxt("X_1_1.csv", X_train, delimiter=",", fmt="%.4f")

# Check

In [2]:
Y_train_pca = pd.read_csv('Y_train_std_pca.csv', header=None, delimiter=',').values
Y_test_pca = pd.read_csv('Y_test_std_pca.csv', header=None, delimiter=',').values

Y_data_1_1_pca = pd.read_csv('Y_data_1_1_pca.csv', header=None, delimiter=',').values

Y_edge_std_pca = pd.read_csv('Y_edge_std_pca.csv', header=None, delimiter=',').values

In [5]:
col_min = np.nanmin(Y_edge_std_pca, axis=0)
col_max = np.nanmax(Y_edge_std_pca, axis=0)


((Y_data_1_1_pca >= col_min) & (Y_data_1_1_pca <= col_max)).sum(axis=1)

array([21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
       21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
       21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
       21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
       21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
       21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21])

In [4]:
X_train = pd.read_csv('../Data/X_train.csv', header=None, delimiter=',').values
X_test = pd.read_csv('../Data/X_test.csv', header=None, delimiter=',').values
X_edge = pd.read_csv('../Data/X_edge.csv', header=None, delimiter=',').values

In [None]:
X_all = np.vstack([X_train, X_edge])