# TSV/CSV to AnnData

Note 1: The data must be normalized.

Note 2: You must know if the cells are rows or columns.

In [2]:
# Import modules.

import session_info
import scanpy as sc
import pandas as pd
from scipy.sparse import csr_matrix, vstack
import anndata as ad

In [3]:
# Information of the session.

sc.settings.verbosity = 3

session_info.show()

1. Adjust the group size. Each block is processed independently, focusing on data quality.

2. If the data has cells in columns and genes in rows, transpose it before converting to AnnData.


In [4]:
# Load count matrix in chunks
file_path = "/Users/Desktop/File.tsv"

chunksize = 1000 # Choose chunk size.
chunks = pd.read_csv(file_path, sep="\t", index_col=0, chunksize=chunksize) # Use sep="\t" for .tsv or sep="," for .csv

transpose_chunks = False  # Set to True if data is genes x cells.

sparse_chunks = []
all_chunks = []

# Read and (optionally) transpose chunks
for chunk in chunks:
    if transpose_chunks:
        chunk = chunk.T
    all_chunks.append(chunk)
    sparse_chunks.append(csr_matrix(chunk.values, dtype="float32"))

# Combine chunks and extract names
X = pd.concat(all_chunks)
cell_names = X.index.astype(str)
gene_names = X.columns.astype(str)

# Stack all sparse chunks
X = vstack(sparse_chunks, format="csr")



In [6]:
# Create the AnnData object with cells as rows and genes as columns
adata = sc.AnnData(
    X=X,  
    obs=pd.DataFrame(index=cell_names),
    var=pd.DataFrame(index=gene_names)
)

# Save anndata
adata.write ("/Users/Desktop/new_adata.h5ad") #compression = "gzip" (optional) 

  utils.warn_names_duplicates("obs")


In [7]:
# Check adata
adata = sc.read_h5ad("/Users/Desktop/new_adata.h5ad", backed= "r") 
adata

  utils.warn_names_duplicates("obs")


AnnData object with n_obs × n_vars = 538 × 23802 backed at '/Users/rafaelsalgueroraigon/Desktop/new_adata.h5ad'

#### Reference example data:

To, K., Fei, L., Pett, J.P. et al. A multi-omic atlas of human embryonic skeletal development. Nature 635, 657–667 (2024). https://doi.org/10.1038/s41586-024-08189-z

