In [1]:
import scanpy as sc
import harmonypy as hm
import pandas as pd
import anndata as ad
import numpy as np

%config InlineBackend.figure_format = 'retina'
%matplotlib inline

import matplotlib as mpl
import matplotlib.pyplot as plt
#import seaborn as sns

DPI=300
FONTSIZE=20 #42

random_state = 7

sc.settings.verbosity = 3  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(scanpy = True, dpi=80, transparent=True, vector_friendly = True, dpi_save=DPI) 
from matplotlib import rcParams
rcParams['pdf.fonttype'] = 42

In [2]:
#Lyko et al.
import os
# Directory containing the 10X .h5 files
input_dir = "/data/BCI-SingleCell/SCC_Atlas/Sam_Nicholls/Unzipped files/GSE218170"

# Get all .h5 files, sorted to preserve file order
file_list = sorted([os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.endswith('.h5ad')])

# List to store all AnnData objects in order
adata_list = []
adata_names = []

# Loop through each .h5 file and read as 10X AnnData
for file in file_list:
    name = os.path.splitext(os.path.basename(file))[0]  # Remove .h5 extension
    print(f"Reading: {name}")

    try:
        adata = sc.read_h5ad(file)
        adata.var_names_make_unique()
    except Exception as e:
        print(f"❌ Error reading {file}: {e}")
        continue

    # Save in list (preserves order)
    adata_names.append(str(name))
    adata_list.append(adata)

    # Optionally assign to global variable by filename (like R's assign)
    globals()[name] = adata

Reading: GSM6735856
Reading: GSM6735857
Reading: GSM6735858
Reading: GSM6735859
Reading: GSM6735860
Reading: GSM6735861
Reading: GSM6735862
Reading: GSM6735863
Reading: GSM6735864
Reading: GSM6735865
Reading: GSM6735866


In [3]:
GSE218170 = sc.concat(adata_list, join="outer", label="batch", keys=adata_names)

  utils.warn_names_duplicates("obs")


In [4]:
GSE218170.obs['Condition'] = GSE218170.obs['batch'].map ({
    'GSM6735856': 'Normal', 'GSM6735857': 'Normal',
    'GSM6735858': 'Normal', 'GSM6735859': 'Tumor',
    'GSM6735860': 'Tumor', 'GSM6735861': 'Tumor',
    'GSM6735862': 'Tumor', 'GSM6735863': 'Tumor',
    'GSM6735864': 'Tumor', 'GSM6735865': 'Tumor',
    'GSM6735866': 'Tumor' })

GSE218170.obs['Patient'] = GSE218170.obs['batch'].map ({
    'GSM6735856': 'P11', 'GSM6735857': 'P12',
    'GSM6735858': 'P13', 'GSM6735859': 'P14',
    'GSM6735860': 'P15', 'GSM6735861': 'P16',
    'GSM6735862': 'P17', 'GSM6735863': 'P18',
    'GSM6735864': 'P19', 'GSM6735865': 'P20',
    'GSM6735866': 'P21' })

In [5]:
GSE218170.obs

Unnamed: 0,orig.ident,nCount_RNA,nFeature_RNA,batch,Condition,Patient
AAACCTGAGAAACCTA,SeuratProject,2162.0,780,GSM6735856,Normal,P11
AAACCTGAGCATCATC,SeuratProject,20485.0,2990,GSM6735856,Normal,P11
AAACCTGAGCGGATCA,SeuratProject,3853.0,1373,GSM6735856,Normal,P11
AAACCTGAGCTCAACT,SeuratProject,20101.0,3951,GSM6735856,Normal,P11
AAACCTGAGCTCCTTC,SeuratProject,1116.0,489,GSM6735856,Normal,P11
...,...,...,...,...,...,...
TTTGTCAGTTTGTTTC,SeuratProject,797.0,435,GSM6735866,Tumor,P21
TTTGTCATCACCAGGC,SeuratProject,1225.0,590,GSM6735866,Tumor,P21
TTTGTCATCCATTCTA,SeuratProject,1677.0,617,GSM6735866,Tumor,P21
TTTGTCATCCCAGGTG,SeuratProject,1965.0,697,GSM6735866,Tumor,P21


In [6]:
GSE218170.write_h5ad('Anndata/GSE218170.h5ad')

115,000 Cells