In [6]:
import sys
import os
import time
import numpy as np
import pandas as pd
import warnings
from tqdm.auto import tqdm
from anndata import AnnData
from pathlib import Path
from multiprocessing import Pool
import cobra

project_root = Path(os.getcwd()).parent
sys.path.append(str(project_root))

print("✅ Added to sys.path:", project_root)

from scripts.utils_scFBApy import scFBApy, repairNeg

print("✅ Environment initialized.")

✅ Added to sys.path: /home/sadegh/python_projects/HRplus-BC-Multimodal
✅ Environment initialized.


In [7]:
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", message="ChainedAssignmentError")

print("⚙️ All warnings suppressed for cleaner output.")



In [None]:
# Set directories of files
base_dir = Path().resolve().parent

input_file = base_dir / "dataset/transcriptomic_hvg.csv"
meta_data = base_dir / "dataset/metadata.csv"
model_file = base_dir / "models/model.xml"
output_dir = base_dir / "flux_batch"
output_dir.mkdir(exist_ok=True)
final_output = base_dir / "dataset/fluxomics.csv"

print(f"""
📂 PATHS
---------------------------------
Input data:  {input_file}
Metadata:    {meta_data}
Model:       {model_file}
Output dir:  {output_dir}
Final file:  {final_output}
""")


📂 PATHS
---------------------------------
Input data:  /home/sadegh/python_projects/HRplus-BC-Multimodal/dataset/transcriptomic_hvg.csv
Metadata:    /home/sadegh/python_projects/HRplus-BC-Multimodal/dataset/metadata.csv
Model:       /home/sadegh/python_projects/HRplus-BC-Multimodal/models/model.xml
Output dir:  /home/sadegh/python_projects/HRplus-BC-Multimodal/flux_batch
Final file:  /home/sadegh/python_projects/HRplus-BC-Multimodal/dataset/fluxomics.csv



In [None]:
# Load Transcriptomic Data
print("📥 Loading transcriptomics data...")
trans = pd.read_csv(input_file, index_col=0)
print(f"Expression matrix shape: {trans.shape}")
trans.head()

In [None]:
# Load Cobra model
print("🧬 Loading COBRA metabolic model...")
model = cobra.io.read_sbml_model(model_file)
model_genes = [g.id for g in model.genes]
print(f"Model loaded: {len(model_genes)} genes, {len(model.reactions)} reactions.")


In [None]:
# Filter to Model Genes and Build AnnData
overlap_genes = [g for g in trans.columns if g in model_genes]
expr_filtered = trans[overlap_genes]
adata_expr = AnnData(expr_filtered)

print(f"✅ Overlapping genes with model: {len(overlap_genes)}")
print(f"AnnData object: {adata_expr.shape}")
adata_expr

In [None]:
# Set Parameters
batch_size = 100
objective = "Biomass"
n_cells = adata_expr.shape[0]
n_batches = (n_cells + batch_size - 1) // batch_size

print(f"Total cells: {n_cells}")
print(f"Batch size: {batch_size}")
print(f"Total batches: {n_batches}")

In [None]:
# Run scFBApy on each batch
def run_batch(batch_idx):
    """Run scFBApy flux computation on a batch of cells"""
    import pandas as pd
    import numpy as np
    import warnings
    from scripts.utils_scFBApy import scFBApy

    warnings.filterwarnings("ignore", category=FutureWarning)
    warnings.filterwarnings("ignore", category=RuntimeWarning)
    warnings.filterwarnings("ignore", category=UserWarning)
    warnings.filterwarnings("ignore", message="ChainedAssignmentError")

    start = batch_idx * batch_size
    end = min((batch_idx + 1) * batch_size, n_cells)
    batch_file = output_dir / f"flux_batch_{batch_idx}.csv"

    if batch_file.exists():
        return f"🟢 Skipped batch {batch_idx+1} (already done)"

    adata_batch = adata_expr[start:end, :].copy()

    try:
        adata_flux_batch = scFBApy(
            model_orig=model,
            adata=adata_batch,
            objective=objective,
            cooperation=True,
            compute_fva=True,
            npop_fva=5,
            eps=0.001,
            type_ras_normalization="max",
            and_expression=np.nanmin,
            or_expression=np.nansum,
            fraction_of_optimum=0,
            processes=1,
            round_c=10
        )

        flux_df = pd.DataFrame(
            adata_flux_batch.X,
            index=adata_flux_batch.obs.index,
            columns=adata_flux_batch.var.index
        )
        flux_df.to_csv(batch_file)
        return f"✅ Finished batch {batch_idx+1}/{n_batches}"

    except Exception as e:
        return f"⚠️ Error in batch {batch_idx}: {e}"


In [None]:
# Generate Fluxomics data from Transcriptomics
print("🚀 Starting parallel flux computation...")
start_time = time.time()

with Pool(processes=8) as pool:  # adjust CPU count as needed
    results = list(tqdm(pool.imap(run_batch, range(n_batches)), total=n_batches))

for r in results:
    print(r)

print(f"\n⏱️ Total runtime: {(time.time() - start_time)/60:.1f} minutes")

In [None]:
# Combine All Batch Files
print("📦 Combining all batch files...")
batch_files = sorted(output_dir.glob("flux_batch_*.csv"))
combined_flux = pd.concat([pd.read_csv(f, index_col=0) for f in batch_files])
print(f"Combined flux shape: {combined_flux.shape}")
combined_flux.head()

In [None]:
# Add Metadata and Save Final File
meta_df = pd.read_csv(meta_data, index_col=0)
combined_flux.index = meta_df.index.values[: combined_flux.shape[0]]
combined_flux["response"] = meta_df["response"].values[: combined_flux.shape[0]]

combined_flux.to_csv(final_output, index=True)
print(f"💾 Final fluxomics file saved to: {final_output}")