In [1]:
import sys
from pathlib import Path

# Resolve project root (notebook is in notebooks/)
PROJECT_ROOT = Path.cwd().resolve().parents[0]
sys.path.insert(0, str(PROJECT_ROOT))

# Define data paths (relative to project root)
MUT_PATH = [
    "data/raw/mutations/filtered_mutations.bed",
]
FAI_PATH = "data/raw/reference/GRCh37.fa.fai"
FASTA_PATH = "data/raw/reference/GRCh37.fa"
TIMING_BW = "data/raw/timing/repliSeq_SknshWaveSignalRep1.bigWig"

DNASE_MAP_PATH = "data/raw/DNase-seq/celltype_dnase_map.json"

TUMOUR_FILTER = None


In [2]:
from scripts.grid_search.runner import run_grid_experiment

# Simple sanity check
simple_check = run_grid_experiment(
    mut_path=MUT_PATH,
    fai_path=FAI_PATH,
    fasta_path=FASTA_PATH,
    dnase_map_path=DNASE_MAP_PATH,
    timing_bigwig=TIMING_BW,
    per_sample_count=2,
    base_seed=123,
    track_strategies=["counts_raw", "exp_decay"],
    counts_raw_bins=[1000000],
    exp_decay_bins=[1000000],
    exp_decay_decay_bp_grid=[200000],
    exp_decay_max_distance_bp_grid=[1000000],
    covariate_sets=[["gc", "cpg", "timing"]],
    include_trinuc=False,
    downsample_counts=None,
    save_per_bin=False,
    out_dir="outputs/experiments/simple_check",
)
simple_check.head()


[00:33:40] INFO Session start
[00:33:40] INFO Inputs
[00:33:40] INFO   mutations_bed:       [PosixPath('/home/lem/projects/mut-epi-origin/data/raw/mutations/filtered_mutations.bed')]
[00:33:40] INFO   fasta:               /home/lem/projects/mut-epi-origin/data/raw/reference/GRCh37.fa
[00:33:40] INFO   fai:                 /home/lem/projects/mut-epi-origin/data/raw/reference/GRCh37.fa.fai
[00:33:40] INFO   dnase_bigwigs:       myel_prog, nkc, kera, smc, eso_epi, neuro_stem, astro_hippo, astro_spinal, mela, fibr
[00:33:40] INFO   timing_bigwig:       /home/lem/projects/mut-epi-origin/data/raw/timing/repliSeq_SknshWaveSignalRep1.bigWig
[00:33:40] INFO   tumour_filter:       AML,LAML,NKTL,BCC,LMS,ESAD,ESCA,NBL,GBM,LGG,SKCM,MELA
[00:33:40] INFO Grid
[00:33:40] INFO   chroms:              22
[00:33:40] INFO   configs:             4
[00:33:40] INFO START Sample selection (per-sample idx=0) ...
[00:35:33] INFO DONE Sample selection (per-sample idx=0) (1m56.7s)
[00:35:33] INFO   sample_slice:  

Unnamed: 0,sample_size_k,repeat,seed_samples,n_selected_samples,sample_slice_start,sample_slice_end,sample_index,sample_id,sample_tag,sample_mode,...,is_correct_rf_resid,is_correct_local_score,downsample_applied,downsample_ratio,rf_top_feature_perm,rf_top_feature_importance_perm,rf_top_is_dnase,exp_decay_bin,exp_decay_decay_bp,exp_decay_max_distance_bp
0,1,0,123,1,0,1,0,filtered_mutations::DO222423,filtered_mutations--DO222423,per_sample,...,True,True,False,1.0,dnase_mela,0.994482,True,,,
1,1,0,123,1,0,1,0,filtered_mutations::DO222423,filtered_mutations--DO222423,per_sample,...,True,True,False,1.0,dnase_mela,0.966554,True,1000000.0,200000.0,1000000.0
2,1,0,123,1,1,2,1,filtered_mutations::DO46065,filtered_mutations--DO46065,per_sample,...,False,False,False,1.0,timing_mean,0.128926,False,,,
3,1,0,123,1,1,2,1,filtered_mutations::DO46065,filtered_mutations--DO46065,per_sample,...,False,False,False,1.0,timing_mean,0.069411,False,1000000.0,200000.0,1000000.0
