## Import

In [1]:
import scvelo as scv
import scanpy as sc
import pandas as pd
import numpy as np
import anndata
import matplotlib.pyplot as plt
import seaborn as sns
import os
from scipy import io
from scipy.sparse import coo_matrix, csr_matrix
import torch
from velovi import preprocess_data, VELOVI

In [2]:
import sys
SANSARA_PATH = os.path.join('../')
sys.path.append(SANSARA_PATH)

from pipeline import SANSARA_pipeline as sansara

In [3]:
help(sansara.run_pipeline)

Help on function run_pipeline in module pipeline.SANSARA_pipeline:

run_pipeline(cfg: pipeline.SANSARA_pipeline.SansaraConfig) -> str
    Run the full SANSARA splicing-aware analysis pipeline.
    
    Parameters
    ----------
    cfg : SansaraConfig
        Configuration object specifying input files, barcode mapping,
        preprocessing parameters, and output options.
    
        See `SansaraConfig` for the full list of available fields.
    
    Returns
    -------
    output_path : str
        Path to the generated splice-aware matrix CSV file.
    
    Example
    -------
    >>> cfg = SansaraConfig(
    ...     loom_file="demo.loom",
    ...     working_dir="./"
    ... )
    >>> out_path = run_pipeline(cfg)



In [7]:
print(sansara.SansaraConfig.__doc__)


    Configuration object for running the SANSARA pipeline.

    Required:
    ---------
    loom_file : str
        Path to velocyto loom file.

    working_dir : str
        Directory containing counts.mtx, metadata.csv, gene_names.csv.

    Optional inputs:
    ----------------
    counts_file : str, default="counts.mtx"
        Name of count matrix file.

    metadata_file : str, default="metadata.csv"
        Name of metadata file.

    gene_names_file : str, default="gene_names.csv"
        Name of gene names file.

    barcode_prefix : Optional[str], default=None
        Prefix added to loom barcodes to match metadata.
        If None, prefix is inferred automatically.

    barcode_postfix : Optional[str], default=None
        Postfix added to loom barcodes to match metadata.
        If None, postfix is inferred automatically.

    output_file : str, default="splice_aware_matrix.csv"
        Output CSV file name.

    Analysis parameters:
    -------------------
    min_shared_c

## Configure and run SANSARA

In [8]:
os.chdir('./data/')

In [9]:
cfg = sansara.SansaraConfig(
    # Required arguments
    loom_file="demo.loom",
    working_dir="./",
    # Optional arguments
    n_top_genes=2000        # specified to speed up processing of demo sample, can be omitted for real data
)
out_path = sansara.run_pipeline(cfg)

Starting SANSARA analysis pipeline...

[1/7] Loading count matrix and metadata...
  Loaded data: 200 cells × 17342 genes

[2/7] Loading loom file and processing barcode naming...
Auto-inferred barcode mapping: prefix='D01_rep1_', postfix='-1'
  Loaded loom data: 200 cells × 36601 genes

[3/7] Merging AnnData and loom data...
  Merged data: 200 cells × 17335 genes

[4/7] Preprocessing and gene selection...
Filtered out 13357 genes that are detected 1 counts (shared).
Normalized count data: X, spliced, unspliced.
Extracted 2000 highly variable genes.
Logarithmized X.


  log1p(adata)
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
SLURM auto-requeueing enabled. Setting signal handlers.


computing moments based on connectivities
    finished (0:00:00) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
computing velocities
    finished (0:00:00) --> added 
    'velocity', velocity vectors for each individual cell (adata.layers)

[5/7] Training VeloVI model...
Epoch 408/500:  82%|████████▏ | 408/500 [01:00<00:13,  6.76it/s, v_num=1, train_loss=-278] 
Monitored metric elbo_validation did not improve in the last 45 records. Best score: -138.277. Signaling Trainer to stop.
Model training complete

[6/7] Computing RNA velocities...

[7/7] Calculating splice-aware counts matrix...

Analysis complete!
saGEX matrix saved to: ./splice_aware_matrix.csv
  Shape: 200 cells × 440 features
  (220 genes with spliced/unspliced components)

