## Setup

In [None]:
import os

if os.path.basename(os.getcwd()) == 'tutorials':
    # Change to parent directory
    os.chdir('..')

In [None]:
from src.preprocess.pipeline import TriplexPipeline, get_config                        
from src.preprocess.pipeline.utils import get_available_gpus

## Data processing

### Pre-processing for TRIPLEX

In [None]:
from huggingface_hub import login

login(token="YOUR HUGGING FACE TOKEN")

In [None]:
hest_config = {
    # Basic configuration
    'input_dir': './input/ST/andersson',  
    'output_dir': './input/ST/andersson', 
    'mode': 'hest',
    
    # Preprocessing parameters
    'slide_ext': '.tif',
    'save_neighbors': True,
    'n_splits': 8,
    'n_top_hvg': 50,
    'n_top_heg': 1000,
    
    # Feature extraction parameters
    'model_name': 'uni_v1', # or cigar
    'batch_size': 1024,
    'num_workers': 4,
    'feature_type': 'both',
    'gpus': [0]
}

pipeline = TriplexPipeline(hest_config)
pipeline.run_pipeline()  

In [None]:
hest_config = {
    # Basic configuration
    'input_dir': './input/ST/andrew',  
    'output_dir': './input/ST/andrew', 
    'mode': 'hest',
    
    # Preprocessing parameters
    'slide_ext': '.tif',
    'save_neighbors': True,
    'n_splits': 4,
    'n_top_hvg': 50,
    'n_top_heg': 1000,
    
    # Feature extraction parameters
    'model_name': 'uni_v1', # or cigar
    'batch_size': 1024,
    'num_workers': 4,
    'feature_type': 'both',
    'gpus': [0]
}

pipeline = TriplexPipeline(hest_config)
pipeline.run_pipeline()  

In [None]:
hest_config = {
    # Basic configuration
    'input_dir': './input/ST/bryan',  
    'output_dir': './input/ST/bryan', 
    'mode': 'hest',
    
    # Preprocessing parameters
    'slide_ext': '.tif',
    'save_neighbors': True,
    'n_splits': 8,
    'n_top_hvg': 50,
    'n_top_heg': 1000,
    
    # Feature extraction parameters
    'model_name': 'uni_v1', # or cigar
    'batch_size': 1024,
    'num_workers': 4,
    'feature_type': 'both',
    'gpus': [0]
}

pipeline = TriplexPipeline(hest_config)
pipeline.run_pipeline()  

### Basic pre-procssing (ST-Net, HisToGene, Hist2ST, BLEEP)

- You can skip this if you've already done the pre-processing for TRIPLEX

In [None]:
hest_config = {
    # Basic configuration
    'input_dir': './input/ST/andersson',  
    'output_dir': './input/ST/andersson', 
    'mode': 'hest',
    
    # Preprocessing parameters
    'slide_ext': '.tif',
    'save_neighbors': False,
    'n_splits': 8,
    'n_top_hvg': 50,
    'n_top_heg': 1000,
}

pipeline = TriplexPipeline(hest_config)
pipeline.preprocess()  

In [None]:
hest_config = {
    # Basic configuration
    'input_dir': './input/ST/andrew',  
    'output_dir': './input/ST/andrew', 
    'mode': 'hest',
    
    # Preprocessing parameters
    'slide_ext': '.tif',
    'save_neighbors': False,
    'n_splits': 4,
    'n_top_hvg': 50,
    'n_top_heg': 1000,
}

pipeline = TriplexPipeline(hest_config)
pipeline.preprocess()  

In [None]:
hest_config = {
    # Basic configuration
    'input_dir': './input/ST/bryan',  
    'output_dir': './input/ST/bryan', 
    'mode': 'hest',
    
    # Preprocessing parameters
    'slide_ext': '.tif',
    'save_neighbors': False,
    'n_splits': 8,
    'n_top_hvg': 50,
    'n_top_heg': 1000,
}

pipeline = TriplexPipeline(hest_config)
pipeline.preprocess()  

### Pre-processing for EGN

- You can skip this if you've already done the pre-processing for TRIPLEX

In [None]:
hest_config = {
    # Basic configuration
    'input_dir': './input/ST/andersson',  
    'output_dir': './input/ST/andersson', 
    'mode': 'hest',
    
    # Preprocessing parameters
    'slide_ext': '.tif',
    'save_neighbors': False,
    'n_splits': 8,
    'n_top_hvg': 50,
    'n_top_heg': 1000,
    
    # Feature extraction parameters
    'model_name': 'uni_v1', # or cigar
    'batch_size': 1024,
    'num_workers': 4,
    'feature_type': 'global',
    'gpus': [0]
}

pipeline = TriplexPipeline(hest_config)
pipeline.run_pipeline()  

In [None]:
hest_config = {
    # Basic configuration
    'input_dir': './input/ST/andrew',  
    'output_dir': './input/ST/andrew', 
    'mode': 'hest',
    
    # Preprocessing parameters
    'slide_ext': '.tif',
    'save_neighbors': False,
    'n_splits': 4,
    'n_top_hvg': 50,
    'n_top_heg': 1000,
    
    # Feature extraction parameters
    'model_name': 'uni_v1', # or cigar
    'batch_size': 1024,
    'num_workers': 4,
    'feature_type': 'global',
    'gpus': [0]
}

pipeline = TriplexPipeline(hest_config)
pipeline.run_pipeline()  

In [None]:
hest_config = {
    # Basic configuration
    'input_dir': './input/ST/bryan',  
    'output_dir': './input/ST/bryan', 
    'mode': 'hest',
    
    # Preprocessing parameters
    'slide_ext': '.tif',
    'save_neighbors': False,
    'n_splits': 8,
    'n_top_hvg': 50,
    'n_top_heg': 1000,
    
    # Feature extraction parameters
    'model_name': 'uni_v1', # or cigar
    'batch_size': 1024,
    'num_workers': 4,
    'feature_type': 'global',
    'gpus': [0]
}

pipeline = TriplexPipeline(hest_config)
pipeline.run_pipeline()  

- Additional pre-processing for EGN


```bash
python src/model/EGN/build_exemplar.py --data_dir input/ST/andersson
python src/model/EGN/build_exemplar.py --data_dir input/ST/andrew
python src/model/EGN/build_exemplar.py --data_dir input/ST/bryan
```

## Model training

Define a dataset to be used for training
```bash
DATASET="ST/andersson"
```

Run the following script to train multiple models using cross-validation:

```bash
NUM_GPU=2
MODE=cv

# Define models to train
MODELS=("TRIPLEX" "StNet" "EGN" "BLEEP")

# Submit jobs for each model
for MODEL in "${MODELS[@]}"; do
     python src/main.py --config_name $DATASET/$MODEL --gpu $NUM_GPU --mode $MODE
done
```

## Model Evaluation

After training, evaluate each model with the following script:

```bash
declare -A MODELS=(
     ["TRIPLEX"]="Log name for TRIPLEX"
     ["StNet"]="Log name for StNet"
     ["EGN"]="Log name for EGN"
     ["BLEEP"]="Log name for BLEEP"
)

# Loop through each model
for MODEL in "${!MODELS[@]}"; do
     TIMESTAMP=${MODELS[$MODEL]}
     python src/main.py --config_name $DATASET/$MODEL --gpu 1 --mode eval --timestamp $TIMESTAMP
     python src/experiment/agg_results.py --dataset $DATASET --model $MODEL --timestamp $TIMESTAMP
done
```