In [1]:
import os
from scipy import io
import anndata as ad

In [8]:
import os
from scipy import io
import pandas as pd
import anndata as ad
import numpy as np
import shutil

def adata_to_seurat_with_embedding(adata_path, output_dir):
    # Load AnnData object
    adata = ad.read_h5ad(adata_path)
    adata = adata.raw.to_adata() if adata.raw else adata  # Use raw data if available

    # Create output directory and matrix_files subfolder
    matrix_files_dir = os.path.join(output_dir, "matrix_files")
    os.makedirs(matrix_files_dir, exist_ok=True)
    
    # Write barcodes
    barcodes_path = os.path.join(matrix_files_dir, "barcodes.tsv")
    with open(barcodes_path, 'w') as f:
        for barcode in adata.obs_names:
            f.write(barcode + '\n')
    
    # Write features
    features_path = os.path.join(matrix_files_dir, "features.tsv")
    with open(features_path, 'w') as f:
        for feature in [f"{x}\t{x}\tGene Expression" for x in adata.var_names]:
            f.write(feature + '\n')
    
    # Write matrix
    matrix_path = os.path.join(matrix_files_dir, "matrix.mtx")
    io.mmwrite(matrix_path, adata.X.T)
    
    # Gzip all files in matrix_files
    for file in os.listdir(matrix_files_dir):
        file_path = os.path.join(matrix_files_dir, file)
        shutil.move(file_path, f"{file_path}.gz")
    
    print(f"Matrix files have been gzipped and saved in {matrix_files_dir}")

    # Export metadata
    metadata_path = os.path.join(output_dir, "metadata.csv")
    adata.obs.to_csv(metadata_path)
    
    # Create UMAP embedding DataFrame
    if "X_umap" in adata.obsm:
        embedding = adata.obsm["X_umap"]
        embedding = np.array(embedding)

        row_names = adata.obs_names.to_list()
        column_names = ["umap_1", "umap_2"]

        embedding_df = pd.DataFrame(embedding, index=row_names, columns=column_names)

        # Save UMAP embedding to CSV
        embedding_path = os.path.join(output_dir, "embedding_umap.csv")
        embedding_df.to_csv(embedding_path)
        print(f"UMAP embedding saved to {embedding_path}")
    else:
        print("UMAP embedding not found in `adata.obsm`. Skipping embedding export.")

    print(f"Data exported successfully to {output_dir}")

In [9]:
# Input file and output directory
adata_path = "stephenson_covid19_apnet.h5ad"  # e.g., "example_data.h5ad"
output_dir = "output_folder"   # e.g., "output_folder"

In [10]:
adata_to_seurat_with_embedding(adata_path, output_dir)

Matrix files have been gzipped and saved in output_folder/matrix_files
UMAP embedding saved to output_folder/embedding_umap.csv
Data exported successfully to output_folder


In [12]:
absolute_path = os.path.abspath(output_dir)
print(f"Absolute path of the output directory: {absolute_path}")

Absolute path of the output directory: /Users/georgegavriilidis/pertpy/output_folder
