In [1]:
import os
import pandas as pd
import scanpy as sc
import anndata
import gc

## Prostate

#### 1. Chen2021

In [2]:

# Base path
base_path = "/home/ubuntu/Downloads/Data_Prostate/Data_Chen2021_Prostate"

# Step 1: Read expression matrix
adata = sc.read_mtx(os.path.join(base_path, "Exp_data_UMIcounts.mtx"))
adata = adata.transpose()  # Transpose to shape: cells × genes

# Step 2: Add gene names
genes = pd.read_csv(os.path.join(base_path, "Genes.txt"), header=None)
adata.var_names = genes[0].values
adata.var_names_make_unique()

# Step 3: Read and merge cell + sample metadata
cells = pd.read_csv(os.path.join(base_path, "Cells.csv"))
samples = pd.read_csv(os.path.join(base_path, "Samples.csv"))

# Merge on the shared 'sample' column
cells_merged = cells.merge(samples, on="sample", how="left")

# Assign merged metadata to AnnData
adata.obs = cells_merged

gc.collect

# Final check
#print(adata)
#print(adata.obs.head())


<function gc.collect(generation=2)>

In [3]:
adata

AnnData object with n_obs × n_vars = 36424 × 25044
    obs: 'cell_name', 'sample', 'cell_type', 'complexity', 'umap1', 'umap2', 'g1s_score', 'g2m_score', 'cell_cycle_phase', 'mp_top_score', 'mp_top', 'mp_assignment', 'technology', 'n_cells', 'patient', 'cancer_type', 'sex', 'age', 'smoking_status', 'PY', 'diagnosis_recurrence', 'disease_extent', 'AJCC_T', 'AJCC_N', 'AJCC_M', 'AJCC_stage', 'sample_primary_met', 'size', 'site', 'histology', 'genetic_hormonal_features', 'grade', 'KI67', 'treated_naive', 'chemotherapy_exposed', 'chemotherapy_response', 'targeted_rx_exposed', 'targeted_rx_response', 'ICB_exposed', 'ICB_response', 'ET_exposed', 'ET_response', 'time_end_of_rx_to_sampling', 'post_sampling_rx_exposed', 'post_sampling_rx_response', 'PFS_DFS', 'OS'

In [4]:
for col in ['umap1', 'umap2', 'g1s_score', 'g2m_score', 'mp_top_score', 'mp_top', 
            'mp_assignment', 'technology_y', 'smoking_status', 'PY', 
            'diagnosis_recurrence', 'AJCC_T', 'AJCC_N', 'AJCC_M', 'AJCC_stage',
            'size',  'histology', 'genetic_hormonal_features', 'grade', 'KI67',
            'chemotherapy_exposed', 'chemotherapy_response', 'targeted_rx_exposed',
            'targeted_rx_response', 'ICB_exposed', 'ICB_response',
            'ET_exposed', 'ET_response', 'time_end_of_rx_to_sampling', 'post_sampling_rx_exposed',
            'post_sampling_rx_response', 'PFS_DFS', 'OS'         
           ]:
    if col in adata.obs.columns:
        del adata.obs[col]

In [9]:
adata.obs

Unnamed: 0,cell_name,sample,cell_type,complexity,cell_cycle_phase,technology,n_cells,patient,cancer_type,sex,age,disease_extent,sample_primary_met,site,treated_naive,study,category,source
0,GAACCTAAGGTCATCT.1,1,Malignant,2643,Not cycling,10x,1554,1,Prostate Cancer,M,81,,primary,,naive,Data_Chen2021_Prostate,Prostate,
1,TTGACTTTCGGACAAG.1,1,Malignant,2543,Not cycling,10x,1554,1,Prostate Cancer,M,81,,primary,,naive,Data_Chen2021_Prostate,Prostate,
2,ATTACTCAGGAGCGAG.1,1,Malignant,3240,Not cycling,10x,1554,1,Prostate Cancer,M,81,,primary,,naive,Data_Chen2021_Prostate,Prostate,
3,GCAGTTAAGCAGGCTA.1,1,Malignant,3307,Not cycling,10x,1554,1,Prostate Cancer,M,81,,primary,,naive,Data_Chen2021_Prostate,Prostate,
4,GTCGGGTTCCTGTACC.1,1,Malignant,3601,Not cycling,10x,1554,1,Prostate Cancer,M,81,,primary,,naive,Data_Chen2021_Prostate,Prostate,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36419,ATAAGAGTCACAGGCC.13,13,Mast,2097,Not cycling,10x,1277,13,Prostate Cancer,M,68,,primary,,naive,Data_Chen2021_Prostate,Prostate,
36420,ACGAGCCTCATGTGGT.13,13,,325,,10x,1277,13,Prostate Cancer,M,68,,primary,,naive,Data_Chen2021_Prostate,Prostate,
36421,ACAGCTAGTAGCGTGA.13,13,Mast,1501,Not cycling,10x,1277,13,Prostate Cancer,M,68,,primary,,naive,Data_Chen2021_Prostate,Prostate,
36422,AAGTCTGTCATAGCAC.13,13,Mast,1054,Not cycling,10x,1277,13,Prostate Cancer,M,68,,primary,,naive,Data_Chen2021_Prostate,Prostate,


In [6]:
adata.obs['study'] = 'Data_Chen2021_Prostate'

In [7]:
adata.obs['category'] = 'Prostate'

In [8]:
adata.obs['source'] = 'NaN'

In [10]:
adata.obs['cell_subtype'] = 'NaN'

In [11]:
output_path = "/home/ubuntu/Downloads/Data_Prostate/Data_Chen2021_Prostate.h5ad"
adata.write(output_path)
print(f"✅ data saved to: {output_path}")

✅ data saved to: /home/ubuntu/Downloads/Data_Prostate/Data_Chen2021_Prostate.h5ad


#### 2.Dong2020

In [13]:

# Base path
base_path = "/home/ubuntu/Downloads/Data_Prostate/Data_Dong2020_Prostate"

# Step 1: Read expression matrix
adata = sc.read_mtx(os.path.join(base_path, "Exp_data_UMIcounts.mtx"))
adata = adata.transpose()  # Transpose to shape: cells × genes

# Step 2: Add gene names
genes = pd.read_csv(os.path.join(base_path, "Genes.txt"), header=None)
adata.var_names = genes[0].values
adata.var_names_make_unique()

# Step 3: Read and merge cell + sample metadata
cells = pd.read_csv(os.path.join(base_path, "Cells.csv"))
samples = pd.read_csv(os.path.join(base_path, "Samples.csv"))

# Merge on the shared 'sample' column
cells_merged = cells.merge(samples, on="sample", how="left")

# Assign merged metadata to AnnData
adata.obs = cells_merged

gc.collect

# Final check
#print(adata)
#print(adata.obs.head())


<function gc.collect(generation=2)>

In [14]:
adata

AnnData object with n_obs × n_vars = 21292 × 15709
    obs: 'cell_name', 'sample', 'cell_type', 'complexity', 'umap1', 'umap2', 'g1s_score', 'g2m_score', 'cell_cycle_phase', 'mp_top_score', 'mp_top', 'mp_assignment', 'technology', 'n_cells', 'patient', 'cancer_type', 'sex', 'age', 'smoking_status', 'PY', 'diagnosis_recurrence', 'disease_extent', 'AJCC_T', 'AJCC_N', 'AJCC_M', 'AJCC_stage', 'sample_primary_met', 'size', 'site', 'histology', 'genetic_hormonal_features', 'grade', 'KI67', 'treated_naive', 'chemotherapy_exposed', 'chemotherapy_response', 'targeted_rx_exposed', 'targeted_rx_response', 'ICB_exposed', 'ICB_response', 'ET_exposed', 'ET_response', 'time_end_of_rx_to_sampling', 'post_sampling_rx_exposed', 'post_sampling_rx_response', 'PFS_DFS', 'OS'

In [15]:
for col in ['umap1', 'umap2', 'g1s_score', 'g2m_score', 'mp_top_score', 'mp_top', 
            'mp_assignment', 'technology_y', 'smoking_status', 'PY', 
            'diagnosis_recurrence', 'AJCC_T', 'AJCC_N', 'AJCC_M', 'AJCC_stage',
            'size',  'histology', 'genetic_hormonal_features', 'grade', 'KI67',
            'chemotherapy_exposed', 'chemotherapy_response', 'targeted_rx_exposed',
            'targeted_rx_response', 'ICB_exposed', 'ICB_response',
            'ET_exposed', 'ET_response', 'time_end_of_rx_to_sampling', 'post_sampling_rx_exposed',
            'post_sampling_rx_response', 'PFS_DFS', 'OS'         
           ]:
    if col in adata.obs.columns:
        del adata.obs[col]

In [22]:
adata.obs

Unnamed: 0,cell_name,sample,cell_type,complexity,cell_cycle_phase,technology,n_cells,patient,cancer_type,sex,age,disease_extent,sample_primary_met,site,treated_naive,study,category,cell_subtype,source
0,AAACCTGAGTGTTTGC.1_1,patient #1,Epithelial,2617,Not cycling,10x,3022,patient #1,Prostate Cancer,M,82,local,primary,prostate,treated,Data_Dong2020_Prostate,Prostate,,
1,AAACCTGCAGTCAGCC.1_1,patient #1,Malignant,4624,Intermediate,10x,3022,patient #1,Prostate Cancer,M,82,local,primary,prostate,treated,Data_Dong2020_Prostate,Prostate,,
2,AAACCTGGTACATGTC.1_1,patient #1,Malignant,2531,Not cycling,10x,3022,patient #1,Prostate Cancer,M,82,local,primary,prostate,treated,Data_Dong2020_Prostate,Prostate,,
3,AAACCTGGTCGGCATC.1_1,patient #1,Epithelial,3196,Not cycling,10x,3022,patient #1,Prostate Cancer,M,82,local,primary,prostate,treated,Data_Dong2020_Prostate,Prostate,,
4,AAACCTGGTGCAGACA.1_1,patient #1,Epithelial,2851,Not cycling,10x,3022,patient #1,Prostate Cancer,M,82,local,primary,prostate,treated,Data_Dong2020_Prostate,Prostate,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21287,TTTGTTGGTTAGAAGT.1_6,patient #6,Malignant,3921,G1/S,10x,6603,patient #6,Prostate Cancer,M,70,metastatic,primary,prostate,treated,Data_Dong2020_Prostate,Prostate,,
21288,TTTGTTGTCAAACCCA.1_6,patient #6,Malignant,1301,Not cycling,10x,6603,patient #6,Prostate Cancer,M,70,metastatic,primary,prostate,treated,Data_Dong2020_Prostate,Prostate,,
21289,TTTGTTGTCAGACTGT.1_6,patient #6,Malignant,3966,G1/S,10x,6603,patient #6,Prostate Cancer,M,70,metastatic,primary,prostate,treated,Data_Dong2020_Prostate,Prostate,,
21290,TTTGTTGTCCTCACGT.1_6,patient #6,Malignant,2579,Not cycling,10x,6603,patient #6,Prostate Cancer,M,70,metastatic,primary,prostate,treated,Data_Dong2020_Prostate,Prostate,,


In [18]:
adata.obs['study'] = 'Data_Dong2020_Prostate'

In [19]:
adata.obs['category'] = 'Prostate'

In [20]:
adata.obs['cell_subtype'] = 'NaN'

In [21]:
adata.obs['source'] = 'NaN'

In [23]:
output_path = "/home/ubuntu/Downloads/Data_Prostate/Data_Dong2020_Prostate.h5ad"
adata.write(output_path)
print(f"✅ data saved to: {output_path}")

✅ data saved to: /home/ubuntu/Downloads/Data_Prostate/Data_Dong2020_Prostate.h5ad


#### 3.He2021

In [24]:

# Base path
base_path = "/home/ubuntu/Downloads/Data_Prostate/Data_He2021_Prostate"

# Step 1: Read expression matrix
adata = sc.read_mtx(os.path.join(base_path, "Exp_data_UMIcounts.mtx"))
adata = adata.transpose()  # Transpose to shape: cells × genes

# Step 2: Add gene names
genes = pd.read_csv(os.path.join(base_path, "Genes.txt"), header=None)
adata.var_names = genes[0].values
adata.var_names_make_unique()

# Step 3: Read and merge cell + sample metadata
cells = pd.read_csv(os.path.join(base_path, "Cells.csv"))
samples = pd.read_csv(os.path.join(base_path, "Samples.csv"))

# Merge on the shared 'sample' column
cells_merged = cells.merge(samples, on="sample", how="left")

# Assign merged metadata to AnnData
adata.obs = cells_merged

gc.collect

# Final check
#print(adata)
#print(adata.obs.head())


<function gc.collect(generation=2)>

In [35]:
adata

AnnData object with n_obs × n_vars = 2170 × 45895
    obs: 'cell_name', 'sample', 'patient', 'cell_type', 'cell_subtype', 'complexity', 'cell_cycle_phase', 'disease', 'source', 'metastasis_x', 'sex', 'cancer_type', 'technology', 'n_cells'

In [29]:
for col in ['umap1', 'umap2', 'g1s_score', 'g2m_score', 'mp_top_score', 'mp_top', 
            'mp_assignment', 'prior_enzalutamide_x', 'prior_abiraterone_x', 
            'prior_taxane_x', 'prior_platinum_x', 'prior_sipuleucel_T_x', 'purity_x', 
            'has_bulk_RNA_x', 'patient_y', 'source_y', 'metastasis_y', 'sex_y',
            'prior_enzalutamide_y', 'prior_abiraterone_y', 'prior_taxane_y', 
            'prior_platinum_y', 'prior_sipuleucel_T_y', 'purity_y', 'has_bulk_RNA_y'      
           ]:
    if col in adata.obs.columns:
        del adata.obs[col]

In [39]:
adata.obs['metastasis_x'].value_counts()

metastasis_x
True    2170
Name: count, dtype: int64

In [31]:
adata.obs = adata.obs.rename(columns={"patient_x": "patient"})

In [32]:
adata.obs = adata.obs.rename(columns={"source_x": "source"})

In [33]:
adata.obs = adata.obs.rename(columns={"sex_x": "sex"})

In [36]:
adata.obs['age'] = 'NaN'

In [43]:
adata.obs['source'] = 'NaN'

In [44]:
adata.obs['site'] = 'NaN'

In [45]:
adata.obs['sample_primary_met'] = 'NaN'

In [46]:
adata.obs['treated_naive'] = 'NaN'

In [47]:
adata.obs['category'] = 'Prostate'

In [48]:
adata.obs['study'] = 'He2021_prostate'

In [40]:
adata.obs['disease_extent'] = 'metastatic'

In [37]:
del adata.obs['cancer_type']

In [41]:
del adata.obs['metastasis_x']

In [38]:
adata.obs = adata.obs.rename(columns={"disease": "cancer_type"})

In [49]:
adata

Unnamed: 0,cell_name,sample,patient,cell_type,cell_subtype,complexity,cell_cycle_phase,cancer_type,source,sex,technology,n_cells,Age,disease_extent,site,sample_primary_met,treated_naive,category,study
0,0,01115149-TC,1115149,Malignant,prostate cancer cell,1890,Not cycling,metastatic prostate carcinoma,,male,SmartSeq2,261,,metastatic,,,,Prostate,He2021
1,1,01115149-TC,1115149,B_cell,plasmablast,1194,Not cycling,metastatic prostate carcinoma,,male,SmartSeq2,261,,metastatic,,,,Prostate,He2021
2,2,01115149-TC,1115149,Malignant,prostate cancer cell,727,,metastatic prostate carcinoma,,male,SmartSeq2,261,,metastatic,,,,Prostate,He2021
3,3,01115149-TC,1115149,Malignant,prostate cancer cell,1815,Not cycling,metastatic prostate carcinoma,,male,SmartSeq2,261,,metastatic,,,,Prostate,He2021
4,4,01115149-TC,1115149,T_cell,CD4+ T cell,2900,Not cycling,metastatic prostate carcinoma,,male,SmartSeq2,261,,metastatic,,,,Prostate,He2021
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2165,2165,09171144,9171144,Erythrocyte,erythroid cell,2825,G2/M,metastatic prostate carcinoma,,male,SmartSeq2,437,,metastatic,,,,Prostate,He2021
2166,2166,09171144,9171144,Erythrocyte,erythroid cell,1320,Not cycling,metastatic prostate carcinoma,,male,SmartSeq2,437,,metastatic,,,,Prostate,He2021
2167,2167,09171144,9171144,Erythrocyte,erythroid cell,3240,G1/S,metastatic prostate carcinoma,,male,SmartSeq2,437,,metastatic,,,,Prostate,He2021
2168,2168,09171144,9171144,Erythrocyte,erythroid cell,1290,Not cycling,metastatic prostate carcinoma,,male,SmartSeq2,437,,metastatic,,,,Prostate,He2021


In [111]:
output_path = "/home/ubuntu/Downloads/Data_Prostate/Data_He2021_Prostate.h5ad"
adata.write(output_path)
print(f"✅ data saved to: {output_path}")

✅ data saved to: /home/ubuntu/Downloads/Data_Prostate/Data_He2021_Prostate.h5ad


In [109]:
adata = sc.read("/home/ubuntu/Downloads/Data_Prostate/Data_He2021_Prostate.h5ad")

In [110]:
adata.obs = adata.obs.rename(columns={"Age": "age"})

#### 4. Song2022

In [52]:

# Base path
base_path = "/home/ubuntu/Downloads/Data_Prostate/Data_Song2022_Prostate"

# Step 1: Read expression matrix
adata = sc.read_mtx(os.path.join(base_path, "Exp_data_UMIcounts.mtx"))
adata = adata.transpose()  # Transpose to shape: cells × genes

# Step 2: Add gene names
genes = pd.read_csv(os.path.join(base_path, "Genes.txt"), header=None)
adata.var_names = genes[0].values
adata.var_names_make_unique()

# Step 3: Read and merge cell + sample metadata
cells = pd.read_csv(os.path.join(base_path, "Cells.csv"))
samples = pd.read_csv(os.path.join(base_path, "Samples.csv"))

# Merge on the shared 'sample' column
cells_merged = cells.merge(samples, on="sample", how="left")

# Assign merged metadata to AnnData
adata.obs = cells_merged

gc.collect

# Final check
#print(adata)
#print(adata.obs.head())


<function gc.collect(generation=2)>

In [91]:
adata

AnnData object with n_obs × n_vars = 21743 × 21877
    obs: 'cell_name', 'sample', 'patient', 'cell_type', 'cell_subtype', 'complexity', 'cell_cycle_phase', 'source', 'cancer_cell_state', 'site', 'disease_extent', 'cancer_type', 'technology', 'n_cells', 'sex', 'age', 'sample_primary_met', 'treated_naive', 'category', 'study'

In [77]:
adata.obs['metastatic_workup_x'].value_counts()

metastatic_workup_x
CT a/p negative        2495
Bone scan negative     1993
Bone scan equivocal     763
Name: count, dtype: int64

In [65]:
for col in ['umap1', 'umap2', 'g1s_score', 'g2m_score', 'mp_top_score', 'mp_top', 
            'mp_assignment', 'PSA_x', 'ERG_status_x', 'cores_x', 'post_op_gleason_score_x',
            'margins_x', 'LVI_x', 'PNI_x', 'TRUS_x', 'MRI_x', 'post_op_psa_x', 'PSA_follow_up_x',
            'time_to_follow_up_x', 'patient_y', 'source_y', 'PSA_y', 'source_region_y', 'gleason_score_y', 
            'ERG_status_y', 'cores_y', 'post_op_gleason_score_y', 'margins_y', 
            'LVI_y', 'PNI_y', 'TRUS_y', 'MRI_y', 'metastatic_workup_y', 
            'post_op_psa_y', 'PSA_follow_up_y', 'time_to_follow_up_y', 'cancer_type'    
           ]:
    if col in adata.obs.columns:
        del adata.obs[col]

In [67]:
adata.obs = adata.obs.rename(columns={"patient_x": "patient"})

In [70]:
adata.obs = adata.obs.rename(columns={"source_x": "source"})

In [73]:
adata.obs = adata.obs.rename(columns={"source_region_x": "site"})

In [81]:
adata.obs = adata.obs.rename(columns={"disease": "cancer_type"})

In [78]:
adata.obs = adata.obs.rename(columns={"metastatic_workup_x": "disease_extent"})

In [83]:
adata.obs['sex'] = 'NaN'

In [85]:
adata.obs['age'] = 'NaN'

In [86]:
adata.obs['sample_primary_met'] = 'NaN'

In [87]:
adata.obs['treated_naive'] = 'NaN'

In [88]:
adata.obs['category'] = 'Prostate'

In [89]:
adata.obs['study'] = 'Song2022_Prostate'

In [75]:
del adata.obs['gleason_score_x']

In [92]:
del adata.obs['cancer_cell_state']

In [93]:
adata.obs

Unnamed: 0,cell_name,sample,patient,cell_type,cell_subtype,complexity,cell_cycle_phase,source,site,disease_extent,cancer_type,technology,n_cells,sex,age,sample_primary_met,treated_naive,category,study
0,AUG_PB1,AUG_PB1,Patient1,Epithelial,Epithelial,6755,Not cycling,Tumor,left-mid,,Prostate cancer,Seq-Well S3,933,,,,,Prostate,Song2022_Prostate
1,AUG_PB1_1,AUG_PB1,Patient1,Epithelial,Epithelial,7285,Not cycling,Tumor,left-mid,,Prostate cancer,Seq-Well S3,933,,,,,Prostate,Song2022_Prostate
2,AUG_PB1_2,AUG_PB1,Patient1,Epithelial,Epithelial,8204,Not cycling,Tumor,left-mid,,Prostate cancer,Seq-Well S3,933,,,,,Prostate,Song2022_Prostate
3,AUG_PB1_3,AUG_PB1,Patient1,Epithelial,Epithelial,7234,Not cycling,Tumor,left-mid,,Prostate cancer,Seq-Well S3,933,,,,,Prostate,Song2022_Prostate
4,AUG_PB1_4,AUG_PB1,Patient1,Epithelial,Epithelial,6291,Not cycling,Tumor,left-mid,,Prostate cancer,Seq-Well S3,933,,,,,Prostate,Song2022_Prostate
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21738,PR5199_1020,PR5199,Patient6,Fibroblast,Fibroblast,453,,Tumor,,,Prostate cancer,Seq-Well S3,1025,,,,,Prostate,Song2022_Prostate
21739,PR5199_1021,PR5199,Patient6,Smooth_muscle,Smooth_muscle,412,,Tumor,,,Prostate cancer,Seq-Well S3,1025,,,,,Prostate,Song2022_Prostate
21740,PR5199_1022,PR5199,Patient6,Smooth_muscle,Smooth_muscle,382,,Tumor,,,Prostate cancer,Seq-Well S3,1025,,,,,Prostate,Song2022_Prostate
21741,PR5199_1023,PR5199,Patient6,Myeloid,Myeloid,423,,Tumor,,,Prostate cancer,Seq-Well S3,1025,,,,,Prostate,Song2022_Prostate


In [94]:
output_path = "/home/ubuntu/Downloads/Data_Prostate/Data_Song2022_Prostate.h5ad"
adata.write(output_path)
print(f"✅ data saved to: {output_path}")

✅ data saved to: /home/ubuntu/Downloads/Data_Prostate/Data_Song2022_Prostate.h5ad


#### Data Merging

In [112]:
import scanpy as sc
import anndata
import os


# Define file paths
files = [
    "/home/ubuntu/Downloads/Data_Prostate/Data_Chen2021_Prostate.h5ad",
    "/home/ubuntu/Downloads/Data_Prostate/Data_Dong2020_Prostate.h5ad",
    "/home/ubuntu/Downloads/Data_Prostate/Data_He2021_Prostate.h5ad",
    "/home/ubuntu/Downloads/Data_Prostate/Data_Song2022_Prostate.h5ad"
]

# Load datasets
adatas = [sc.read(file) for file in files]

# Merge all AnnData objects
adata_merged = anndata.concat(adatas, join="outer", fill_value=0)

# Fix non-string columns (e.g. 'sample') to avoid h5py write errors
for col in adata_merged.obs.columns:
    if adata_merged.obs[col].dtype == 'object':
        adata_merged.obs[col] = adata_merged.obs[col].astype(str)

# Save merged dataset
output_path = "/home/ubuntu/Downloads/Data_Prostate/Prostate_Combined.h5ad"
adata_merged.write(output_path)

print(f"✅ Merged and saved to: {output_path}")


  utils.warn_names_duplicates("obs")


✅ Merged and saved to: /home/ubuntu/Downloads/Data_Prostate/Prostate_Combined.h5ad
