# Convert h5 to h5ad format

This script convert h5 data into h5ad adata object. We run it on PDAC dataset.

Input needed: 
- filtered_feature_bc_matrix.h5
- spatial/

Output:
- annotated_data_matrix.h5ad

Functions used in this script were found: 
- https://scanpy.readthedocs.io/en/stable/generated/scanpy.read_visium.html
- https://eleozzr.github.io/desc/tutorial.html

# Import packages

pip install scanpy

pip install h5py

In [9]:
import scanpy as sc
import os
import h5py

# PDAC A1

In [95]:
path_to_ST_files_A1 = "st_pancreas/PDAC_1274746B_A1/"

In [96]:
h5ad_file_A1_visium = sc.read_visium(path_to_ST_files_A1)

In [97]:
h5ad_file_A1_visium

AnnData object with n_obs × n_vars = 2368 × 17943
    obs: 'in_tissue', 'array_row', 'array_col'
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'spatial'
    obsm: 'spatial'

In [98]:
h5ad_file_A1_visium.write('./st_pancreas_h5ad/PDAC_A1_visium.h5ad')

# PDAC B2

In [91]:
path_to_ST_files_B2 = "st_pancreas/PDAC_ILS50185PT2_B2/"

In [92]:
h5ad_file_B2_visium = sc.read_visium(path_to_ST_files_B2)

In [85]:
h5ad_file_B2_visium

AnnData object with n_obs × n_vars = 3085 × 17943
    obs: 'in_tissue', 'array_row', 'array_col'
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'spatial'
    obsm: 'spatial'

In [93]:
h5ad_file_B2_visium.write('./st_pancreas_h5ad/PDAC_B2_visium.h5ad')

# PanCN C3

In [87]:
path_to_ST_files_C3 = "st_pancreas/PanCN_AVD_79MZ_0594_C3/"

In [88]:
h5ad_file_C3_visium = sc.read_visium(path_to_ST_files_C3)

In [89]:
h5ad_file_C3_visium

AnnData object with n_obs × n_vars = 2364 × 17943
    obs: 'in_tissue', 'array_row', 'array_col'
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'spatial'
    obsm: 'spatial'

In [90]:
h5ad_file_C3_visium.write('./st_pancreas_h5ad/PanCN_C3_visium.h5ad')

# PanCN D4

In [79]:
path_to_ST_files_D4 = "st_pancreas/PanCN_AVD_79MZ_0158_D4/"

In [80]:
h5ad_file_D4_visium = sc.read_visium(path_to_ST_files_D4)

In [81]:
h5ad_file_D4_visium

AnnData object with n_obs × n_vars = 2316 × 17943
    obs: 'in_tissue', 'array_row', 'array_col'
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'spatial'
    obsm: 'spatial'

In [83]:
h5ad_file_D4_visium.write('./st_pancreas_h5ad/PanCN_D4_visium.h5ad')

## Save files in output files

In [100]:
output_files= f"/sbgenomics/output-files/st_pancreas_h5ad"
os.makedirs(output_files, exist_ok=True)

In [101]:
h5ad_file_A1_visium.write(f'{output_files}/PDAC_A1_visium.h5ad')
h5ad_file_B2_visium.write(f'{output_files}/PDAC_B2_visium.h5ad')
h5ad_file_C3_visium.write(f'{output_files}/PanCN_C3_visium.h5ad')
h5ad_file_D4_visium.write(f'{output_files}/PanCN_D4_visium.h5ad')