# Set up

In [1]:
import numpy as np
import pandas as pd
from typing import List, Tuple

In [2]:
RNG = np.random.default_rng(1030)

LOW, HIGH = 0.1, 5.0
BASELINE = 2.5
CENTRES = [1.0, 1.1, 1.5, 2.0, 3.0]
REPLICATES_PER_REGION = 10

In [None]:
def sample_trunc_bvn(mean: np.ndarray, cov: np.ndarray, low: float, high: float) -> np.ndarray:
    while True:
        x = RNG.multivariate_normal(mean, cov)
        if low <= x[0] <= high and low <= x[1] <= high:
            return x

# def build_datasets_for_centre(
#     m: float,
#     sd2: float = 0.1,
#     rho: float = 0.3,
#     baseline_jitter: float = 1e-2,
#     jitter_mode: str = "random"
# ) -> pd.DataFrame:
#     sd = np.sqrt(sd2)
#     cov = np.array([[sd**2, rho*sd**2],
#                     [rho*sd**2, sd**2]])
#     rows = []
#     for region in range(1, 6):
#         for _ in range(REPLICATES_PER_REGION):
#             mean = np.array([m, m])
#             a_r, b_r = sample_trunc_bvn(mean, cov, LOW, HIGH)

#             params = []
#             for r in range(1, 6):
#                 if r == region:
#                     # 当前被“激活”的区域：用真正采样到的那一对
#                     params.extend([a_r, b_r])
#                 else:
#                     # 其他区域：本来是 BASELINE，这里加一个很小的扰动
#                     if jitter_mode == "random":
#                         # 随机小抖动，零均值，幅度由 baseline_jitter 控制
#                         eps_a = np.random.uniform(-baseline_jitter, baseline_jitter)
#                         eps_b = np.random.uniform(-baseline_jitter, baseline_jitter)
#                         params.extend([BASELINE + eps_a, BASELINE + eps_b])
#                     else:
#                         # 固定的小偏移，保证不等于 BASELINE
#                         params.extend([BASELINE + baseline_jitter,
#                                        BASELINE + baseline_jitter])

#             rows.append(params)

#     df = pd.DataFrame(
#         rows,
#         columns=[f"{ab}{r}" for r in range(1, 6) for ab in ("a", "b")]
#     )
#     return df


def build_datasets_for_centre(
    m: float,
    sd2: float = 0.1,
    rho: float = 0.3,
    baseline_jitter: float = 1e-2,
    jitter_mode: str = "random",
    n_active_regions: int = 1,   # 新增：每行激活多少个区域
) -> pd.DataFrame:

    if not (1 <= n_active_regions <= 5):
        raise ValueError("n_active_regions must be between 1 and 5.")

    sd = np.sqrt(sd2)
    cov = np.array([[sd**2,        rho * sd**2],
                    [rho * sd**2,  sd**2      ]])

    rows = []
    all_regions = list(range(1, 6))

    for region in all_regions:

        for _ in range(REPLICATES_PER_REGION):
            mean = np.array([m, m])

            # === 选出本行中被激活的区域集合 ===
            # 确保当前 region 一定在激活集合里，然后再随机补足剩下的
            active_regions = [region]
            if n_active_regions > 1:
                others = [r for r in all_regions if r != region]
                extra = np.random.choice(
                    others,
                    size=n_active_regions - 1,
                    replace=False
                )
                active_regions.extend(extra)

            # === 为每一个激活区域单独采样一对 (a_r, b_r) ===
            samples = {}
            for r_act in active_regions:
                samples[r_act] = sample_trunc_bvn(mean, cov, LOW, HIGH)  # 返回 shape (2,)

            # === 组装这一行的 10 个参数 a1,b1,...,a5,b5 ===
            params = []
            for r in all_regions:
                if r in active_regions:
                    a_r, b_r = samples[r]
                    params.extend([a_r, b_r])
                else:
                    # 非激活区域：baseline + 小扰动
                    if jitter_mode == "random":
                        eps_a = np.random.uniform(-baseline_jitter, baseline_jitter)
                        eps_b = np.random.uniform(-baseline_jitter, baseline_jitter)
                        params.extend([BASELINE + eps_a, BASELINE + eps_b])
                    else:
                        params.extend([BASELINE + baseline_jitter,
                                       BASELINE + baseline_jitter])

            rows.append(params)

    df = pd.DataFrame(
        rows,
        columns=[f"{ab}{r}" for r in range(1, 6) for ab in ("a", "b")]
    )
    return df


In [4]:

df = build_datasets_for_centre(1.2, sd2=0.05, rho=0.3, baseline_jitter = 0)

In [5]:
df.shape

(50, 10)

In [11]:
df = pd.DataFrame({f'col_{i}': [5] for i in range(10)})

In [13]:
df = df.to_numpy()
m, n = df.shape


new_data = np.zeros((m, 34))


mapping = {
    0: [4, 16],
    1: [5, 17],
    2: [10, 22],
    3: [11, 23],
    4: [8, 6, 20, 18],
    5: [9, 7, 21, 19],
    6: [2, 0, 14, 12],
    7: [3, 1, 15, 13],
    8: [24, 26, 28, 30, 32],
    9: [25, 27, 29, 31, 33]
}

for orig_col, new_cols in mapping.items():
    for new_col in new_cols:
        new_data[:, new_col] = df[:, orig_col]

In [14]:
# np.savetxt("LocalDisease/X_3.txt", new_data, fmt='%0.4f')
np.savetxt("X_5_max.txt", new_data, fmt='%0.4f')

In [None]:
X_train = pd.read_csv('X_1_1.txt', header=None, delimiter=' ').values

m = X_train.shape[0]

mapping = {
    0: [4, 16],
    1: [5, 17],
    2: [10, 22],
    3: [11, 23],
    4: [8, 6, 20, 18],
    5: [9, 7, 21, 19],
    6: [2, 0, 14, 12],
    7: [3, 1, 15, 13],
    8: [24, 26, 28, 30, 32],
    9: [25, 27, 29, 31, 33]
}

X_all_recovered = np.zeros((m, len(mapping)))


for orig_col, new_cols in mapping.items():

    X_all_recovered[:, orig_col] = X_train[:, new_cols].mean(axis=1)

X_train = np.around(X_all_recovered, decimals=4)

In [None]:
np.savetxt("X_1_1.csv", X_train, delimiter=",", fmt="%.4f")

# Check

In [2]:
Y_train_pca = pd.read_csv('Y_train_std_pca.csv', header=None, delimiter=',').values
Y_test_pca = pd.read_csv('Y_test_std_pca.csv', header=None, delimiter=',').values

Y_data_1_1_pca = pd.read_csv('Y_data_1_1_pca.csv', header=None, delimiter=',').values
Y_data_1_5_pca = pd.read_csv('Y_data_1_5_pca.csv', header=None, delimiter=',').values
Y_data_2_pca = pd.read_csv('Y_data_2_pca.csv', header=None, delimiter=',').values

Y_edge_std_pca = pd.read_csv('Y_edge_std_pca.csv', header=None, delimiter=',').values

Y_data_1_2_jitter_B_2_5_pca = pd.read_csv('Y_data_1_2_jitter_B_2_5_pca.csv', header=None, delimiter=',').values

In [3]:
col_min = np.nanmin(Y_train_pca, axis=0)
col_max = np.nanmax(Y_train_pca, axis=0)


((Y_data_1_1_pca >= col_min) & (Y_data_1_1_pca <= col_max)).sum(axis=1)

array([21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
       21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
       21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
       21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
       21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
       21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21])

In [None]:
X_train = pd.read_csv('../Data/X_train.csv', header=None, delimiter=',').values
X_test = pd.read_csv('../Data/X_test.csv', header=None, delimiter=',').values
X_edge = pd.read_csv('../Data/X_edge.csv', header=None, delimiter=',').values

In [None]:
X_all = np.vstack([X_train, X_edge])