# Validation of Slurm Job

Auth: Joshua Pickard (jpic@umich.edu)

In [1]:
import os
import scanpy as sc

In [2]:
def read_all_h5ad_files(directory):
    """
    Reads all .h5ad objects found in the specified directory.
    
    Args:
    directory (str): The directory to search for .h5ad files.
    
    Returns:
    dict: A dictionary with filenames as keys and AnnData objects as values.
    """
    h5ad_files = [f for f in os.listdir(directory) if f.endswith('.h5ad')]
    adata_dict = {}
    
    for file in h5ad_files:
        filepath = os.path.join(directory, file)
        print(filepath)
        adata = sc.read_h5ad(filepath)
        print(adata.shape)
        adata_dict[file] = adata
    
    return adata_dict

# Example usage:
directory = "/nfs/turbo/umms-indikar/shared/projects/geneformer/fib15k"
adata_dict = read_all_h5ad_files(directory)

# Print the keys (filenames) of the dictionary
print(adata_dict.keys())


/nfs/turbo/umms-indikar/shared/projects/geneformer/fib15k/2024-07-30_19-41-29_job_number_2.h5ad
(20, 512)
/nfs/turbo/umms-indikar/shared/projects/geneformer/fib15k/2024-07-30_20-07-55_job_number_1.h5ad




(30616, 512)
/nfs/turbo/umms-indikar/shared/projects/geneformer/fib15k/2024-07-30_19-41-49_job_number_3.h5ad
(20, 512)
/nfs/turbo/umms-indikar/shared/projects/geneformer/fib15k/2024-07-30_20-06-23_job_number_4.h5ad




(30616, 512)
/nfs/turbo/umms-indikar/shared/projects/geneformer/fib15k/2024-07-30_19-41-29_job_number_1.h5ad
(20, 512)
/nfs/turbo/umms-indikar/shared/projects/geneformer/fib15k/2024-07-30_20-06-19_job_number_3.h5ad




(30616, 512)
/nfs/turbo/umms-indikar/shared/projects/geneformer/fib15k/2024-07-30_20-05-48_job_number_2.h5ad
(30616, 512)
/nfs/turbo/umms-indikar/shared/projects/geneformer/fib15k/2024-07-30_19-41-49_job_number_4.h5ad
(20, 512)
dict_keys(['2024-07-30_19-41-29_job_number_2.h5ad', '2024-07-30_20-07-55_job_number_1.h5ad', '2024-07-30_19-41-49_job_number_3.h5ad', '2024-07-30_20-06-23_job_number_4.h5ad', '2024-07-30_19-41-29_job_number_1.h5ad', '2024-07-30_20-06-19_job_number_3.h5ad', '2024-07-30_20-05-48_job_number_2.h5ad', '2024-07-30_19-41-49_job_number_4.h5ad'])




In [5]:
newest = ['2024-07-30_20-05-48_job_number_2.h5ad',
          '2024-07-30_20-06-19_job_number_3.h5ad',
          '2024-07-30_20-06-23_job_number_4.h5ad',
          '2024-07-30_20-07-55_job_number_1.h5ad'
         ]

In [10]:
for file in newest:
    print(f'shape of {file} = {adata_dict[file].shape}')

shape of 2024-07-30_20-05-48_job_number_2.h5ad = (30616, 512)
shape of 2024-07-30_20-06-19_job_number_3.h5ad = (30616, 512)
shape of 2024-07-30_20-06-23_job_number_4.h5ad = (30616, 512)
shape of 2024-07-30_20-07-55_job_number_1.h5ad = (30616, 512)


In [8]:
adata_dict[file].obs

Unnamed: 0,input_ids,cell_type,dataset,length,ignore,standardized_cell_type,broad_type,__index_level_0__,cell_id,recipe,type
0,[16345 9009 13048 ... 9332 13451 5456],fibroblast,TS_Vasculature,2048,fibroblast,Fibroblast,fibroblast,109770,cell_1,raw,initial
1,[12119 9190 16876 ... 11232 1132 1022],fibroblast,TS_Vasculature,2048,fibroblast,Fibroblast,fibroblast,109771,cell_2,raw,initial
2,[ 3878 9009 4115 ... 4697 10362 12098],fibroblast,TS_Vasculature,2048,fibroblast,Fibroblast,fibroblast,109774,cell_3,raw,initial
3,[ 3878 16916 18367 ... 15470 3946 2153],fibroblast,TS_Vasculature,2048,fibroblast,Fibroblast,fibroblast,109776,cell_4,raw,initial
4,[ 6196 16916 10920 ... 7629 10148 17125],fibroblast,TS_Vasculature,2048,fibroblast,Fibroblast,fibroblast,109777,cell_5,raw,initial
...,...,...,...,...,...,...,...,...,...,...,...
30611,"[14409, 11599, 12698, 5806, 10804, 16916, 4665...",Fibroblasts,TS_Fat,2048,Fibroblasts,Fibroblast,fibroblast,171938,cell_15304,GATA2;GFI1B;FOS;STAT5A;REL,reprogrammed
30612,"[14409, 11599, 12698, 5806, 10804, 16916, 1211...",Fibroblasts,TS_Fat,2048,Fibroblasts,Fibroblast,fibroblast,171942,cell_15305,GATA2;GFI1B;FOS;STAT5A;REL,reprogrammed
30613,"[14409, 11599, 12698, 5806, 10804, 16916, 3878...",Fibroblasts,TS_Fat,2035,Fibroblasts,Fibroblast,fibroblast,171944,cell_15306,GATA2;GFI1B;FOS;STAT5A;REL,reprogrammed
30614,"[14409, 11599, 12698, 5806, 10804, 16916, 2903...",Fibroblasts,TS_Fat,1394,Fibroblasts,Fibroblast,fibroblast,171948,cell_15307,GATA2;GFI1B;FOS;STAT5A;REL,reprogrammed
