## Setup

### Download data from GEO dataset

1. Navigate to the GEO dataset page:
    https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE240429

2. Download and unzip all files (if necessary) *except* the following:
    - barcodes.tsv.gz
    - features.tsv.gz
    - matrix.mtx.gz

In [None]:
import os

if os.path.basename(os.getcwd()) == 'tutorials':
    # Change to parent directory
    os.chdir('..')

## Preprocess data


In [None]:
from src.preprocess.pipeline import TriplexPipeline

### Pre-processing for TRIPLEX

In [None]:
visium_config = {
    # Basic configuration
    'input_dir': './GSE240429',  # Replace with actual path
    'output_dir': 'input/GSE240429',
    'mode': 'train',
    'platform': 'visium',
    
    # Preprocessing parameters
    'slide_ext': '.tif',
    'save_neighbors': True,
    'n_splits': 5,
    'n_top_hvg': 50,
    'n_top_heg': 1000,
    
    # Feature extraction parameters
    'model_name': 'uni_v1', # or cigar
    'batch_size': 1024,
    'num_workers': 4,
    'feature_type': 'both',
    'gpus': [0]
}

pipeline = TriplexPipeline(visium_config)
pipeline.run_pipeline() 


### Basic pre-procssing (ST-Net, HisToGene, Hist2ST, BLEEP)

- You can skip this if you've already done the pre-processing for TRIPLEX

In [None]:
hest_config = {
    # Basic configuration
    'input_dir': './GSE240429',  # Replace with actual path
    'output_dir': 'input/GSE240429',
    'mode': 'train',
    'platform': 'visium',
    
    # Preprocessing parameters
    'slide_ext': '.tif',
    'save_neighbors': False,
    'n_splits': 5,
    'n_top_hvg': 50,
    'n_top_heg': 1000,
}

pipeline = TriplexPipeline(hest_config)
pipeline.preprocess()  

### Pre-processing for EGN

- You can skip this if you've already done the pre-processing for TRIPLEX

In [None]:
hest_config = {
    # Basic configuration
    'input_dir': './GSE240429',  # Replace with actual path
    'output_dir': 'input/GSE240429',
    'mode': 'train',
    'platform': 'visium',
    
    # Preprocessing parameters
    'slide_ext': '.tif',
    'save_neighbors': False,
    'n_splits': 5,
    'n_top_hvg': 50,
    'n_top_heg': 1000,
    
    # Feature extraction parameters
    'model_name': 'uni_v1', # or cigar
    'batch_size': 1024,
    'num_workers': 4,
    'feature_type': 'global',
    'gpus': [0]
}

pipeline = TriplexPipeline(hest_config)
pipeline.run_pipeline()  

- Additional pre-processing for EGN


```bash
python src/model/EGN/build_exemplar.py --data_dir input/ST/andersson
python src/model/EGN/build_exemplar.py --data_dir input/ST/andrew
python src/model/EGN/build_exemplar.py --data_dir input/ST/bryan
```

## Model Training


```bash
NUM_GPU=2
MODE=cv
DATASET="GSE240429"

# Define models to train
MODELS=("TRIPLEX" "StNet" "EGN" "BLEEP")

# Submit jobs for each model
for MODEL in "${MODELS[@]}"; do
     python src/main.py --config_name $DATASET/$MODEL --gpu $NUM_GPU --mode $MODE
done
```

## Model Evaluation



```bash
declare -A MODELS=(
     ["TRIPLEX"]="Log name for TRIPLEX"
     ["StNet"]="Log name for StNet"
     ["EGN"]="Log name for EGN"
     ["BLEEP"]="Log name for BLEEP"
)

DATASET="GSE240429"
# Loop through each model
for MODEL in "${!MODELS[@]}"; do
     TIMESTAMP=${MODELS[$MODEL]}
     python src/main.py --config_name $DATASET/$MODEL --gpu 1 --mode eval --timestamp $TIMESTAMP
     python src/experiment/agg_results.py --dataset $DATASET --model $MODEL --timestamp $TIMESTAMP
done
```