# Notebook for converting h5ad files to h5 files

**Created by :** Srivalli Kolla

**Created on :** 07 April, 2025

**Modified on :** 07 April, 2025

**University of Würzburg**

# Importing packages

In [40]:
import scanpy as sc
import os
from scipy import io
from scipy.sparse import csr_matrix
import pandas as pd
import numpy as np

# Importing Data

In [41]:
adata = sc.read_h5ad('./Github/Nuclear_hashing_2025/data/demultiplexed_HTOdemux_raw_annotated_04_04_25.h5ad')
adata

AnnData object with n_obs × n_vars = 13066 × 28013
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'nCount_HTO', 'nFeature_HTO', 'HTO_maxID', 'HTO_secondID', 'HTO_margin', 'HTO_classification', 'HTO_classification.global', 'hash.ID', 'ident', 'Sample', 'Sample-ID', 'Mouse-ID', 'Sex', 'Group', 'Ref hashtag', 'Nuclei Purification Method after Hashing', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'percent_chrY', 'XIST-counts', 'XIST-percentage', 'gender_check_cov', 'S_score', 'G2M_score', 'phase', 'leiden_0.2', 'leiden_0.3', 'cell_type'
    var: 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'n_cells'
    uns: 'Group_colors', 'HTO_classification_colors', 'Sample_colors', 'Sex_colors', 'X_name', 'cell_type_colors', 'dea_ranking', 'dendrogram_leiden_0.2', 'leiden', 'leiden_0.2', 'leiden_0.2_colors', 'leiden_0.3', 'leiden_0.3_colors', 'leiden_colors', 'log1p', 'neig

#### Check if data is raw or Normalized

In [42]:
def X_is_raw(adata):
    return np.array_equal(adata.X.sum(axis=0).astype(int), adata.X.sum(axis=0))

In [43]:
print(X_is_raw(adata))

False


In [44]:
#adata.X = adata.layers['raw_counts']

In [45]:
print(X_is_raw(adata))

False


In [46]:
adata

AnnData object with n_obs × n_vars = 13066 × 28013
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'nCount_HTO', 'nFeature_HTO', 'HTO_maxID', 'HTO_secondID', 'HTO_margin', 'HTO_classification', 'HTO_classification.global', 'hash.ID', 'ident', 'Sample', 'Sample-ID', 'Mouse-ID', 'Sex', 'Group', 'Ref hashtag', 'Nuclei Purification Method after Hashing', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'percent_chrY', 'XIST-counts', 'XIST-percentage', 'gender_check_cov', 'S_score', 'G2M_score', 'phase', 'leiden_0.2', 'leiden_0.3', 'cell_type'
    var: 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'n_cells'
    uns: 'Group_colors', 'HTO_classification_colors', 'Sample_colors', 'Sex_colors', 'X_name', 'cell_type_colors', 'dea_ranking', 'dendrogram_leiden_0.2', 'leiden', 'leiden_0.2', 'leiden_0.2_colors', 'leiden_0.3', 'leiden_0.3_colors', 'leiden_colors', 'log1p', 'neig

In [47]:
raw = adata.raw.to_adata() if adata.raw else adata
raw

AnnData object with n_obs × n_vars = 13066 × 32285
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'nCount_HTO', 'nFeature_HTO', 'HTO_maxID', 'HTO_secondID', 'HTO_margin', 'HTO_classification', 'HTO_classification.global', 'hash.ID', 'ident', 'Sample', 'Sample-ID', 'Mouse-ID', 'Sex', 'Group', 'Ref hashtag', 'Nuclei Purification Method after Hashing', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'percent_chrY', 'XIST-counts', 'XIST-percentage', 'gender_check_cov', 'S_score', 'G2M_score', 'phase', 'leiden_0.2', 'leiden_0.3', 'cell_type'
    var: 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts'
    uns: 'Group_colors', 'HTO_classification_colors', 'Sample_colors', 'Sex_colors', 'X_name', 'cell_type_colors', 'dea_ranking', 'dendrogram_leiden_0.2', 'leiden', 'leiden_0.2', 'leiden_0.2_colors', 'leiden_0.3', 'leiden_0.3_colors', 'leiden_colors', 'log1p', 'neighbors', 'pc

In [48]:
output_dir = './Github/Nuclear_hashing_2025/data/demultiplexed_HTODemux_cellbender'
os.makedirs(output_dir, exist_ok=True)

# Exporting Data

In [49]:
io.mmwrite(f"{output_dir}/matrix.mtx", csr_matrix(raw.X))
pd.DataFrame(raw.obs_names).to_csv(f"{output_dir}/barcodes.tsv", index=False, header=False)
pd.DataFrame(raw.var_names).to_csv(f"{output_dir}/genes.tsv", index=False, header=False)