# Merged train+val and merged info and y

In [None]:
import pandas as pd
import os
import re

# === 自然排序函式 ===
def chr_sort_key(chr_name):
    m = re.match(r"chr(\d+)", chr_name)
    if m:
        return int(m.group(1))
    elif chr_name == "chrX":
        return 23
    elif chr_name == "chrY":
        return 24
    else:
        return 100  # 其他 contigs (random, Un, etc.)

# === 路徑設定 ===
base = r"C:\Users\wani\Desktop\Courses\ML for genomics\ML4G_Project_1_Data\CAGE-train\CAGE-train"
out_folder = r"C:\Users\wani\Desktop\Courses\ML for genomics\preprocessed_data\CAGE-merged"
os.makedirs(out_folder, exist_ok=True)

# === 處理 X1, X2 ===
for cell in ["X1", "X2"]:
    print(f"\n🔹 Processing {cell} ...")

    # 讀取 info
    info_train = pd.read_csv(os.path.join(base, f"{cell}_train_info.tsv"), sep="\t")
    info_val = pd.read_csv(os.path.join(base, f"{cell}_val_info.tsv"), sep="\t")
    info_merged = pd.concat([info_train, info_val], ignore_index=True)

    # 讀取 y
    y_train = pd.read_csv(os.path.join(base, f"{cell}_train_y.tsv"), sep="\t")
    y_val = pd.read_csv(os.path.join(base, f"{cell}_val_y.tsv"), sep="\t")
    y_merged = pd.concat([y_train, y_val], ignore_index=True)

    if y_merged.shape[1] == 2:
        y_merged.columns = ["gene_name", "gex"]
    else:
        y_merged.columns = ["gex"]
        y_merged.insert(0, "gene_name", info_merged["gene_name"])

    merged = pd.merge(info_merged, y_merged, on="gene_name", how="inner")

    # 儲存結果
    merged_path = os.path.join(out_folder, f"{cell}_merged.tsv")
    merged.to_csv(merged_path, sep="\t", index=False)
    print(f"✅ Saved merged file: {merged_path} ({len(merged)} genes)")

    # 印出排序後的 chr 名稱
    if "chr" in merged.columns:
        unique_chrs = sorted(merged["chr"].unique(), key=chr_sort_key)
        print(f"🧬 {cell} unique chromosomes ({len(unique_chrs)}):")
        print(", ".join(unique_chrs))
    else:
        print(f"⚠️ Column 'chr' not found in merged file for {cell}!")

print("\n🎯 All cell lines merged successfully!")


# Add Marks z-score stat

In [7]:
import os
import json
import numpy as np
import pandas as pd
import pyBigWig
from tqdm import tqdm

# === 基本設定 ===
base = "/Users/deweywang/Desktop/GitHub/Gene-expression-prediction/ML4G_Project_1_Data"
merged_base = "/Users/deweywang/Desktop/GitHub/Gene-expression-prediction/preprocessed_data/CAGE-merged"
stats_path = "/Users/deweywang/Desktop/GitHub/Gene-expression-prediction/preprocessed_data/global_norm_stats.json"

marks = ["DNase", "H3K27ac", "H3K4me3", "H3K27me3", "H3K36me3", "H3K4me1", "H3K9me3"]
tss_window = 1000  # +/- 1kb 區域作為 promoter 區域
cells = {
    "X1": os.path.join(merged_base, "X1_merged.tsv"),
    "X2": os.path.join(merged_base, "X2_merged.tsv"),
    "X3": os.path.join(base, "CAGE-train/CAGE-train/X3_test_info.tsv"),
}

# === 載入 global normalization 統計 ===
with open(stats_path, "r") as f:
    stats = json.load(f)

# === 輔助函式 ===
import numpy as np
from scipy.stats import kurtosis, skew
from numpy.fft import fft


import numpy as np
from scipy.stats import kurtosis, skew


def region_zsignal(bw, chrom, start, end, global_mean, global_std, mark_name=None, cell_name=None):
    """
    取出 bigWig 區域的多層級 z-score 特徵：
    1️⃣ Mean/Std/Min/Max/Diff
    2️⃣ Signal Gradient / Derivative Features
    3️⃣ Shape-based Descriptors (Kurtosis, Skewness)
    4️⃣ Entropy / Complexity
    5️⃣ Spatial Autocorrelation / Smoothness
    ⚙️ 所有 NaN / inf / 無效區域均會設為 0.0，保證輸出穩定。
    """

    feature_keys = [
        "z_mean", "z_std", "z_min", "z_max", "z_diff",
        "gradient_mean", "slope", "kurtosis", "skewness",
        "entropy", "autocorr", "laplacian"
    ]

    chroms = bw.chroms()

    # === 1️⃣ 染色體不存在 ===
    if chrom not in chroms:
        print(f"⚠️ [region_zsignal] Missing chromosome {chrom} in {mark_name or '?'} ({cell_name or '?'}) → fill 0.0")
        return {k: 0.0 for k in feature_keys}

    chrom_length = chroms[chrom]
    start = max(0, int(start))
    end = min(int(end), chrom_length)

    # === 2️⃣ 無效區域 ===
    if end <= start:
        print(f"⚠️ [region_zsignal] Invalid region {chrom}:{start}-{end} (end <= start) in {mark_name or '?'} → fill 0.0")
        return {k: 0.0 for k in feature_keys}

    # === 3️⃣ 取值 ===
    vals = np.array(bw.values(chrom, start, end, numpy=True))
    if vals is None or len(vals) == 0:
        print(f"⚠️ [region_zsignal] Empty values for {chrom}:{start}-{end} in {mark_name or '?'} → fill 0.0")
        return {k: 0.0 for k in feature_keys}

    vals = vals[~np.isnan(vals)]
    if len(vals) == 0:
        print(f"⚠️ [region_zsignal] All NaN values for {chrom}:{start}-{end} in {mark_name or '?'} → fill 0.0")
        return {k: 0.0 for k in feature_keys}

    # === 4️⃣ 計算統計量 ===
    local_mean = np.mean(vals)
    local_std  = np.std(vals)
    local_min  = np.min(vals)
    local_max  = np.max(vals)
    local_diff = local_max - local_min

    # === Z-score normalization ===
    z_mean = (local_mean - global_mean) / (global_std + 1e-8)
    z_std  = local_std / (global_std + 1e-8)
    z_min  = (local_min - global_mean) / (global_std + 1e-8)
    z_max  = (local_max - global_mean) / (global_std + 1e-8)
    z_diff = z_max - z_min

    # === 5️⃣ 處理 inf/nan 結果 ===
    for name in ["z_mean", "z_std", "z_min", "z_max", "z_diff"]:
        val = locals()[name]
        if not np.isfinite(val):
            print(f"⚠️ [region_zsignal] {name} not finite ({val}) for {chrom}:{start}-{end} in {mark_name or '?'} → set to 0")
            locals()[name] = 0.0

    # === 6️⃣ Signal Gradient / Slope ===
    if len(vals) > 1:
        diffs = np.diff(vals)
        gradient_mean = np.mean(np.abs(diffs)) if len(diffs) > 0 else 0.0
        x = np.arange(len(vals))
        try:
            slope = np.polyfit(x, vals, 1)[0]
        except Exception:
            slope = 0.0
    else:
        gradient_mean, slope = 0.0, 0.0

    # === 7️⃣ Shape-based Descriptors ===
    sharpness = kurtosis(vals) if len(vals) > 3 else 0.0
    asymmetry = skew(vals) if len(vals) > 3 else 0.0

    # === 8️⃣ Entropy / Complexity ===
    p = np.abs(vals)
    if p.sum() == 0:
        local_entropy = 0.0
    else:
        p = p / (p.sum() + 1e-8)
        local_entropy = -np.sum(p * np.log2(p + 1e-8))

    # === 9️⃣ Spatial Autocorrelation / Smoothness ===
    if len(vals) > 2:
        try:
            autocorr = np.corrcoef(vals[:-1], vals[1:])[0, 1]
        except Exception:
            autocorr = 0.0
    else:
        autocorr = 0.0

    if len(vals) > 3:
        laplacian = np.mean(np.abs(vals[:-2] - 2 * vals[1:-1] + vals[2:]))
    else:
        laplacian = 0.0

    # === 10️⃣ 確保所有結果有限 ===
    result = {
        "z_mean": z_mean,
        "z_std": z_std,
        "z_min": z_min,
        "z_max": z_max,
        "z_diff": z_diff,
        "gradient_mean": gradient_mean,
        "slope": slope,
        "kurtosis": sharpness,
        "skewness": asymmetry,
        "entropy": local_entropy,
        "autocorr": autocorr,
        "laplacian": laplacian,
    }

    for k, v in result.items():
        if not np.isfinite(v):
            result[k] = 0.0

    return result


def get_tss_region(row, window=1000):
    """根據 strand (+/-) 給出 promoter (TSS ± window) 區域"""
    if row["strand"] == "+":
        start = max(0, row["TSS_start"] - window)
        end = row["TSS_end"]
    else:  # strand == "-"
        start = row["TSS_start"]
        end = row["TSS_end"] + window
    return start, end


# === 主迴圈：對每個 cell line 做 ===
# === 主迴圈：對每個 cell line 做 ===
for cell, merged_path in cells.items():
    if not os.path.exists(merged_path):
        print(f"⚠️ Missing file for {cell}: {merged_path}")
        continue

    print(f"\n🧬 Processing cell line: {cell}")
    genes = pd.read_csv(merged_path, sep="\t")

    # 避免 test 沒有 gex 欄位出錯
    if "gex" not in genes.columns:
        genes["gex"] = np.nan

    # === 對每個 mark 做特徵提取 ===
    for mark in marks:
        bw_path = os.path.join(base, f"{mark}-bigwig", f"{cell}.bw")
        if not os.path.exists(bw_path):
            bw_path = os.path.join(base, f"{mark}-bigwig", f"{cell}.bigwig")
        if not os.path.exists(bw_path):
            print(f"⚠️ Missing {mark} ({cell})")
            continue

        print(f"\n📂 Reading {mark} ({cell}) ...")
        bw = pyBigWig.open(bw_path)
        chroms = bw.chroms()

        key = f"{mark}_{cell}"
        if key not in stats:
            print(f"⚠️ No global stat found for {key}")
            continue

        global_mean = stats[key]["mean"]
        global_std = stats[key]["std"]

        # === 建立空字典儲存所有 feature ===
        gene_features = {f"{mark}_gene_{k}": [] for k in [
            "z_mean", "z_std", "z_min", "z_max", "z_diff",
            "gradient_mean", "slope", "kurtosis", "skewness",
            "entropy", "autocorr", "laplacian"
        ]}
        tss_features = {f"{mark}_tss_{k}": [] for k in [
            "z_mean", "z_std", "z_min", "z_max", "z_diff",
            "gradient_mean", "slope", "kurtosis", "skewness",
            "entropy", "autocorr", "laplacian"
        ]}

        # === 主迴圈：對每個基因計算 ===
        for _, row in tqdm(genes.iterrows(), total=len(genes)):
            chrom = row["chr"]
            if chrom not in chroms:
                for d in (gene_features, tss_features):
                    for k in d.keys():
                        d[k].append(np.nan)
                continue

            # === Gene body ===
            g_stats = region_zsignal(
                bw, chrom, row["gene_start"], row["gene_end"],
                global_mean, global_std,
                mark_name=mark, cell_name=cell
            )
            for k, v in g_stats.items():
                gene_features[f"{mark}_gene_{k}"].append(v)

            # === TSS / promoter ===
            tss_start, tss_end = get_tss_region(row, window=tss_window)
            t_stats = region_zsignal(
                bw, chrom, tss_start, tss_end,
                global_mean, global_std,
                mark_name=mark, cell_name=cell
            )
            for k, v in t_stats.items():
                tss_features[f"{mark}_tss_{k}"].append(v)

        bw.close()

        # === 合併進 DataFrame ===
        for feat_dict in (gene_features, tss_features):
            for col, vals in feat_dict.items():
                genes[col] = vals

    # === 輸出結果 ===
    out_path = os.path.join(merged_base, f"{cell}_zscore_dynamics.tsv")
    genes.to_csv(out_path, sep="\t", index=False)
    print(f"\n✅ Saved dynamic z-score features for {cell} → {out_path}")



🧬 Processing cell line: X1

📂 Reading DNase (X1) ...


  sharpness = kurtosis(vals) if len(vals) > 3 else 0.0
  asymmetry = skew(vals) if len(vals) > 3 else 0.0
  c /= stddev[:, None]
  c /= stddev[None, :]
100%|██████████| 16284/16284 [01:49<00:00, 149.07it/s]



📂 Reading H3K27ac (X1) ...


  sharpness = kurtosis(vals) if len(vals) > 3 else 0.0
  asymmetry = skew(vals) if len(vals) > 3 else 0.0
  c /= stddev[:, None]
 33%|███▎      | 5455/16284 [00:39<01:16, 140.79it/s]

⚠️ [region_zsignal] All NaN values for chr5:181367268-181368262 in H3K27ac → fill 0.0
⚠️ [region_zsignal] All NaN values for chr5:181366267-181367317 in H3K27ac → fill 0.0


  c /= stddev[None, :]
 51%|█████     | 8236/16284 [01:00<00:50, 158.07it/s]

⚠️ [region_zsignal] All NaN values for chr10:133623895-133626795 in H3K27ac → fill 0.0
⚠️ [region_zsignal] All NaN values for chr10:133626745-133627795 in H3K27ac → fill 0.0


100%|██████████| 16284/16284 [01:53<00:00, 143.31it/s]



📂 Reading H3K4me3 (X1) ...


  c /= stddev[:, None]
  c /= stddev[None, :]
100%|██████████| 16284/16284 [01:57<00:00, 138.40it/s]



📂 Reading H3K27me3 (X1) ...


  sharpness = kurtosis(vals) if len(vals) > 3 else 0.0
  asymmetry = skew(vals) if len(vals) > 3 else 0.0
  c /= stddev[:, None]
  c /= stddev[None, :]
 34%|███▎      | 5461/16284 [00:37<01:00, 177.45it/s]

⚠️ [region_zsignal] All NaN values for chr5:181367268-181368262 in H3K27me3 → fill 0.0
⚠️ [region_zsignal] All NaN values for chr5:181366267-181367317 in H3K27me3 → fill 0.0


 53%|█████▎    | 8694/16284 [00:57<00:39, 192.00it/s]

⚠️ [region_zsignal] All NaN values for chr4:190173774-190185942 in H3K27me3 → fill 0.0
⚠️ [region_zsignal] All NaN values for chr4:190172773-190173823 in H3K27me3 → fill 0.0


100%|██████████| 16284/16284 [01:37<00:00, 167.29it/s]



📂 Reading H3K36me3 (X1) ...


  sharpness = kurtosis(vals) if len(vals) > 3 else 0.0
  asymmetry = skew(vals) if len(vals) > 3 else 0.0
  c /= stddev[:, None]
 34%|███▎      | 5465/16284 [00:32<00:58, 185.73it/s]

⚠️ [region_zsignal] All NaN values for chr5:181367268-181368262 in H3K36me3 → fill 0.0
⚠️ [region_zsignal] All NaN values for chr5:181366267-181367317 in H3K36me3 → fill 0.0


 53%|█████▎    | 8691/16284 [00:52<00:38, 196.46it/s]

⚠️ [region_zsignal] All NaN values for chr4:190173774-190185942 in H3K36me3 → fill 0.0
⚠️ [region_zsignal] All NaN values for chr4:190172773-190173823 in H3K36me3 → fill 0.0


  c /= stddev[None, :]
100%|██████████| 16284/16284 [01:36<00:00, 168.64it/s]
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals



📂 Reading H3K4me1 (X1) ...


  sharpness = kurtosis(vals) if len(vals) > 3 else 0.0
  asymmetry = skew(vals) if len(vals) > 3 else 0.0
  c /= stddev[:, None]
 53%|█████▎    | 8676/16284 [00:54<00:37, 200.58it/s]

⚠️ [region_zsignal] All NaN values for chr4:190173774-190185942 in H3K4me1 → fill 0.0
⚠️ [region_zsignal] All NaN values for chr4:190172773-190173823 in H3K4me1 → fill 0.0


100%|██████████| 16284/16284 [01:34<00:00, 171.49it/s]
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals



📂 Reading H3K9me3 (X1) ...


  sharpness = kurtosis(vals) if len(vals) > 3 else 0.0
  asymmetry = skew(vals) if len(vals) > 3 else 0.0
  c /= stddev[:, None]
 34%|███▎      | 5463/16284 [00:34<00:58, 185.87it/s]

⚠️ [region_zsignal] All NaN values for chr5:181367268-181368262 in H3K9me3 → fill 0.0
⚠️ [region_zsignal] All NaN values for chr5:181366267-181367317 in H3K9me3 → fill 0.0


 53%|█████▎    | 8689/16284 [00:54<00:42, 180.78it/s]

⚠️ [region_zsignal] All NaN values for chr4:190173774-190185942 in H3K9me3 → fill 0.0
⚠️ [region_zsignal] All NaN values for chr4:190172773-190173823 in H3K9me3 → fill 0.0


100%|██████████| 16284/16284 [01:37<00:00, 167.73it/s]
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals



✅ Saved dynamic z-score features for X1 → /Users/deweywang/Desktop/GitHub/Gene-expression-prediction/preprocessed_data/CAGE-merged/X1_zscore_dynamics.tsv

🧬 Processing cell line: X2

📂 Reading DNase (X2) ...


  sharpness = kurtosis(vals) if len(vals) > 3 else 0.0
  asymmetry = skew(vals) if len(vals) > 3 else 0.0
  c /= stddev[:, None]
  c /= stddev[None, :]
100%|██████████| 16284/16284 [01:27<00:00, 185.92it/s]



📂 Reading H3K27ac (X2) ...


  sharpness = kurtosis(vals) if len(vals) > 3 else 0.0
  asymmetry = skew(vals) if len(vals) > 3 else 0.0
  c /= stddev[:, None]
 14%|█▍        | 2275/16284 [00:13<01:23, 167.63it/s]

⚠️ [region_zsignal] All NaN values for chr22:50782264-50783314 in H3K27ac → fill 0.0


 34%|███▎      | 5465/16284 [00:33<00:58, 185.53it/s]

⚠️ [region_zsignal] All NaN values for chr5:181367268-181368262 in H3K27ac → fill 0.0
⚠️ [region_zsignal] All NaN values for chr5:181366267-181367317 in H3K27ac → fill 0.0


100%|██████████| 16284/16284 [01:33<00:00, 174.08it/s]



📂 Reading H3K4me3 (X2) ...


  sharpness = kurtosis(vals) if len(vals) > 3 else 0.0
  asymmetry = skew(vals) if len(vals) > 3 else 0.0
  c /= stddev[:, None]
  c /= stddev[None, :]
 34%|███▎      | 5463/16284 [00:35<01:02, 173.48it/s]

⚠️ [region_zsignal] All NaN values for chr5:181367268-181368262 in H3K4me3 → fill 0.0
⚠️ [region_zsignal] All NaN values for chr5:181366267-181367317 in H3K4me3 → fill 0.0


100%|██████████| 16284/16284 [01:54<00:00, 142.28it/s]



📂 Reading H3K27me3 (X2) ...


  sharpness = kurtosis(vals) if len(vals) > 3 else 0.0
  asymmetry = skew(vals) if len(vals) > 3 else 0.0
  c /= stddev[:, None]
 34%|███▎      | 5462/16284 [00:35<01:05, 164.04it/s]

⚠️ [region_zsignal] All NaN values for chr5:181367268-181368262 in H3K27me3 → fill 0.0
⚠️ [region_zsignal] All NaN values for chr5:181366267-181367317 in H3K27me3 → fill 0.0


 53%|█████▎    | 8683/16284 [00:57<00:43, 174.24it/s]

⚠️ [region_zsignal] All NaN values for chr4:190173774-190185942 in H3K27me3 → fill 0.0


100%|██████████| 16284/16284 [01:43<00:00, 157.74it/s]



📂 Reading H3K36me3 (X2) ...


  sharpness = kurtosis(vals) if len(vals) > 3 else 0.0
  asymmetry = skew(vals) if len(vals) > 3 else 0.0
  c /= stddev[:, None]
 33%|███▎      | 5455/16284 [00:40<01:14, 144.94it/s]

⚠️ [region_zsignal] All NaN values for chr5:181367268-181368262 in H3K36me3 → fill 0.0
⚠️ [region_zsignal] All NaN values for chr5:181366267-181367317 in H3K36me3 → fill 0.0


 53%|█████▎    | 8669/16284 [01:03<00:50, 150.11it/s]

⚠️ [region_zsignal] All NaN values for chr4:190173774-190185942 in H3K36me3 → fill 0.0


100%|██████████| 16284/16284 [01:52<00:00, 145.30it/s]
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals



📂 Reading H3K4me1 (X2) ...


  sharpness = kurtosis(vals) if len(vals) > 3 else 0.0
  asymmetry = skew(vals) if len(vals) > 3 else 0.0
  c /= stddev[:, None]
 34%|███▎      | 5462/16284 [00:38<01:13, 147.43it/s]

⚠️ [region_zsignal] All NaN values for chr5:181367268-181368262 in H3K4me1 → fill 0.0
⚠️ [region_zsignal] All NaN values for chr5:181366267-181367317 in H3K4me1 → fill 0.0


  c /= stddev[None, :]
100%|██████████| 16284/16284 [01:49<00:00, 148.84it/s]
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals



📂 Reading H3K9me3 (X2) ...


  sharpness = kurtosis(vals) if len(vals) > 3 else 0.0
  asymmetry = skew(vals) if len(vals) > 3 else 0.0
  c /= stddev[:, None]
 34%|███▎      | 5469/16284 [00:38<01:05, 165.27it/s]

⚠️ [region_zsignal] All NaN values for chr5:181367268-181368262 in H3K9me3 → fill 0.0
⚠️ [region_zsignal] All NaN values for chr5:181366267-181367317 in H3K9me3 → fill 0.0


100%|██████████| 16284/16284 [01:47<00:00, 151.11it/s]
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals



✅ Saved dynamic z-score features for X2 → /Users/deweywang/Desktop/GitHub/Gene-expression-prediction/preprocessed_data/CAGE-merged/X2_zscore_dynamics.tsv

🧬 Processing cell line: X3

📂 Reading DNase (X3) ...


  c /= stddev[:, None]
100%|██████████| 1984/1984 [00:10<00:00, 193.47it/s]



📂 Reading H3K27ac (X3) ...


  sharpness = kurtosis(vals) if len(vals) > 3 else 0.0
  asymmetry = skew(vals) if len(vals) > 3 else 0.0
  c /= stddev[:, None]
100%|██████████| 1984/1984 [00:10<00:00, 183.19it/s]



📂 Reading H3K4me3 (X3) ...


  sharpness = kurtosis(vals) if len(vals) > 3 else 0.0
  asymmetry = skew(vals) if len(vals) > 3 else 0.0
  c /= stddev[:, None]
  c /= stddev[None, :]
100%|██████████| 1984/1984 [00:11<00:00, 178.91it/s]



📂 Reading H3K27me3 (X3) ...


  sharpness = kurtosis(vals) if len(vals) > 3 else 0.0
  asymmetry = skew(vals) if len(vals) > 3 else 0.0
  c /= stddev[:, None]
100%|██████████| 1984/1984 [00:11<00:00, 168.64it/s]



📂 Reading H3K36me3 (X3) ...


  sharpness = kurtosis(vals) if len(vals) > 3 else 0.0
  asymmetry = skew(vals) if len(vals) > 3 else 0.0
  c /= stddev[:, None]
100%|██████████| 1984/1984 [00:10<00:00, 182.78it/s]
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals



📂 Reading H3K4me1 (X3) ...


  sharpness = kurtosis(vals) if len(vals) > 3 else 0.0
  asymmetry = skew(vals) if len(vals) > 3 else 0.0
  c /= stddev[:, None]
100%|██████████| 1984/1984 [00:10<00:00, 185.08it/s]
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals



📂 Reading H3K9me3 (X3) ...


  sharpness = kurtosis(vals) if len(vals) > 3 else 0.0
  asymmetry = skew(vals) if len(vals) > 3 else 0.0
  c /= stddev[:, None]
  c /= stddev[None, :]
100%|██████████| 1984/1984 [00:09<00:00, 200.77it/s]
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals
  genes[col] = vals



✅ Saved dynamic z-score features for X3 → /Users/deweywang/Desktop/GitHub/Gene-expression-prediction/preprocessed_data/CAGE-merged/X3_zscore_dynamics.tsv
