In [1]:
import anndata as ad
import scanpy as sc
import pandas as pd
import os
import numpy as np
from tqdm import tqdm

In [2]:
import toolbox as tb

In [3]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

In [5]:
def load_mtx(mtx_path: str, barcodes_path: str, features_path: str, sample_name: str | None = None) -> ad.AnnData:
    adata = sc.read_mtx(mtx_path).transpose()
    
    barcodes = pd.read_csv(barcodes_path, header=None, sep='\t', names=['barcodes'])
    features = pd.read_csv(features_path, header=None, sep='\t', names=['gene_ids', 'gene_names'])

    adata.obs_names = sample_name + "_" + barcodes['barcodes'] if sample_name is not None else barcodes['barcodes']
    adata.var_names = features['gene_ids']
    adata.var['gene_names'] = features['gene_names'].values
    
    if sample_name is not None:
        adata.obs['sample'] = sample_name

    return adata

In [6]:
data_path = "/nfs/data/COST_IBD/data/atopic_dermatitis/datasets/Rindler"
sample_names = os.listdir(data_path)

In [7]:
adata_list = []

for sample_name in tqdm(sample_names):
    sample_dir = os.path.join(data_path, sample_name)

    matrix_path = os.path.join(sample_dir, "matrix.mtx.gz")
    barcodes_path = os.path.join(sample_dir, "barcodes.tsv.gz")
    features_path = os.path.join(sample_dir, "features.tsv.gz")

    single_adata: ad.AnnData = load_mtx(matrix_path, barcodes_path, features_path, sample_name)
    
    # The contents of gene_names are worthless
    single_adata.var.drop(columns=["gene_names"], inplace=True)

    # The dataset contains duplicated gene names
    single_adata.var_names_make_unique()

    adata_list.append(single_adata)

100%|██████████| 4/4 [00:07<00:00,  1.88s/it]


In [8]:
adata = ad.concat(adata_list, join="outer")
adata

AnnData object with n_obs × n_vars = 3598 × 33538
    obs: 'sample'

In [9]:
adata.write('/nfs/data/COST_IBD/data/atopic_dermatitis/anndata/rindler.h5ad')

In [10]:
adata

AnnData object with n_obs × n_vars = 3598 × 33538
    obs: 'sample'

In [3]:
adata = sc.read('/nfs/data/COST_IBD/data/atopic_dermatitis/anndata/rindler.h5ad')

In [6]:
adata.obs


Unnamed: 0_level_0,sample
barcodes,Unnamed: 1_level_1
AD20_AAACCCAGTGTGTTTG-1,AD20
AD20_AAACCCATCTCCCTAG-1,AD20
AD20_AAACGAACATGGCGCT-1,AD20
AD20_AAACGAATCACAGTGT-1,AD20
AD20_AAACGCTCAGGATGAC-1,AD20
...,...
AD24_TTTCGATAGCATACTC-1,AD24
AD24_TTTGACTAGCATTTCG-1,AD24
AD24_TTTGACTAGTGAATAC-1,AD24
AD24_TTTGACTAGTTCATCG-1,AD24


In [5]:
adata.obs['patient'] = adata.obs['sample']
adata.obs.head()

Unnamed: 0_level_0,sample,patient
barcodes,Unnamed: 1_level_1,Unnamed: 2_level_1
AD20_AAACCCAGTGTGTTTG-1,AD20,AD20
AD20_AAACCCATCTCCCTAG-1,AD20,AD20
AD20_AAACGAACATGGCGCT-1,AD20,AD20
AD20_AAACGAATCACAGTGT-1,AD20,AD20
AD20_AAACGCTCAGGATGAC-1,AD20,AD20


In [4]:
adata.obs['tissue'] = 'Skin Suction Blister'
adata.obs

Unnamed: 0_level_0,sample,patient,tissue,condition,batch,easi_score,severity
barcodes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AD20_AAACCCAGTGTGTTTG-1,AD20,AD20,Skin Suction Blister,Spontaneously healed atopic dermatitis,GSE162054,0,clear
AD20_AAACCCATCTCCCTAG-1,AD20,AD20,Skin Suction Blister,Spontaneously healed atopic dermatitis,GSE162054,0,clear
AD20_AAACGAACATGGCGCT-1,AD20,AD20,Skin Suction Blister,Spontaneously healed atopic dermatitis,GSE162054,0,clear
AD20_AAACGAATCACAGTGT-1,AD20,AD20,Skin Suction Blister,Spontaneously healed atopic dermatitis,GSE162054,0,clear
AD20_AAACGCTCAGGATGAC-1,AD20,AD20,Skin Suction Blister,Spontaneously healed atopic dermatitis,GSE162054,0,clear
...,...,...,...,...,...,...,...
AD24_TTTCGATAGCATACTC-1,AD24,AD24,Skin Suction Blister,Spontaneously healed atopic dermatitis,GSE162054,0,clear
AD24_TTTGACTAGCATTTCG-1,AD24,AD24,Skin Suction Blister,Spontaneously healed atopic dermatitis,GSE162054,0,clear
AD24_TTTGACTAGTGAATAC-1,AD24,AD24,Skin Suction Blister,Spontaneously healed atopic dermatitis,GSE162054,0,clear
AD24_TTTGACTAGTTCATCG-1,AD24,AD24,Skin Suction Blister,Spontaneously healed atopic dermatitis,GSE162054,0,clear


In [8]:
adata.obs['condition'] = 'Spontaneously healed atopic dermatitis'
adata.obs

Unnamed: 0_level_0,sample,patient,tissue,condition
barcodes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AD20_AAACCCAGTGTGTTTG-1,AD20,AD20,Skin Suction blister,Spontaneously healed atopic dermatitis
AD20_AAACCCATCTCCCTAG-1,AD20,AD20,Skin Suction blister,Spontaneously healed atopic dermatitis
AD20_AAACGAACATGGCGCT-1,AD20,AD20,Skin Suction blister,Spontaneously healed atopic dermatitis
AD20_AAACGAATCACAGTGT-1,AD20,AD20,Skin Suction blister,Spontaneously healed atopic dermatitis
AD20_AAACGCTCAGGATGAC-1,AD20,AD20,Skin Suction blister,Spontaneously healed atopic dermatitis
...,...,...,...,...
AD24_TTTCGATAGCATACTC-1,AD24,AD24,Skin Suction blister,Spontaneously healed atopic dermatitis
AD24_TTTGACTAGCATTTCG-1,AD24,AD24,Skin Suction blister,Spontaneously healed atopic dermatitis
AD24_TTTGACTAGTGAATAC-1,AD24,AD24,Skin Suction blister,Spontaneously healed atopic dermatitis
AD24_TTTGACTAGTTCATCG-1,AD24,AD24,Skin Suction blister,Spontaneously healed atopic dermatitis


In [9]:
adata.obs['batch'] = 'GSE162054'
adata.obs

Unnamed: 0_level_0,sample,patient,tissue,condition,batch
barcodes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AD20_AAACCCAGTGTGTTTG-1,AD20,AD20,Skin Suction blister,Spontaneously healed atopic dermatitis,GSE162054
AD20_AAACCCATCTCCCTAG-1,AD20,AD20,Skin Suction blister,Spontaneously healed atopic dermatitis,GSE162054
AD20_AAACGAACATGGCGCT-1,AD20,AD20,Skin Suction blister,Spontaneously healed atopic dermatitis,GSE162054
AD20_AAACGAATCACAGTGT-1,AD20,AD20,Skin Suction blister,Spontaneously healed atopic dermatitis,GSE162054
AD20_AAACGCTCAGGATGAC-1,AD20,AD20,Skin Suction blister,Spontaneously healed atopic dermatitis,GSE162054
...,...,...,...,...,...
AD24_TTTCGATAGCATACTC-1,AD24,AD24,Skin Suction blister,Spontaneously healed atopic dermatitis,GSE162054
AD24_TTTGACTAGCATTTCG-1,AD24,AD24,Skin Suction blister,Spontaneously healed atopic dermatitis,GSE162054
AD24_TTTGACTAGTGAATAC-1,AD24,AD24,Skin Suction blister,Spontaneously healed atopic dermatitis,GSE162054
AD24_TTTGACTAGTTCATCG-1,AD24,AD24,Skin Suction blister,Spontaneously healed atopic dermatitis,GSE162054


In [6]:
adata.obs

Unnamed: 0_level_0,sample,patient,tissue,condition,batch,easi_score,severity
barcodes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AD20_AAACCCAGTGTGTTTG-1,AD20,AD20,Skin Suction Blister,Spontaneously healed atopic dermatitis,GSE162054,0,clear
AD20_AAACCCATCTCCCTAG-1,AD20,AD20,Skin Suction Blister,Spontaneously healed atopic dermatitis,GSE162054,0,clear
AD20_AAACGAACATGGCGCT-1,AD20,AD20,Skin Suction Blister,Spontaneously healed atopic dermatitis,GSE162054,0,clear
AD20_AAACGAATCACAGTGT-1,AD20,AD20,Skin Suction Blister,Spontaneously healed atopic dermatitis,GSE162054,0,clear
AD20_AAACGCTCAGGATGAC-1,AD20,AD20,Skin Suction Blister,Spontaneously healed atopic dermatitis,GSE162054,0,clear
...,...,...,...,...,...,...,...
AD24_TTTCGATAGCATACTC-1,AD24,AD24,Skin Suction Blister,Spontaneously healed atopic dermatitis,GSE162054,0,clear
AD24_TTTGACTAGCATTTCG-1,AD24,AD24,Skin Suction Blister,Spontaneously healed atopic dermatitis,GSE162054,0,clear
AD24_TTTGACTAGTGAATAC-1,AD24,AD24,Skin Suction Blister,Spontaneously healed atopic dermatitis,GSE162054,0,clear
AD24_TTTGACTAGTTCATCG-1,AD24,AD24,Skin Suction Blister,Spontaneously healed atopic dermatitis,GSE162054,0,clear


In [7]:
adata.write('/nfs/data/COST_IBD/data/atopic_dermatitis/anndata/rindler.h5ad')

In [4]:
adata = sc.read('/nfs/data/COST_IBD/data/atopic_dermatitis/anndata/rindler.h5ad')

In [4]:
adata.obs['sample'].unique()

['AD20', 'AD25', 'AD21', 'AD24']
Categories (4, object): ['AD20', 'AD21', 'AD24', 'AD25']

In [9]:
easi_dict = {
    'AD20': 0,
    'AD21': 0,
    'AD25': 0,
    'AD24': 0,
}
adata.obs['easi_score'] = adata.obs['sample'].map(easi_dict)
adata.obs

Unnamed: 0_level_0,sample,patient,tissue,condition,batch,easi_score
barcodes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AD20_AAACCCAGTGTGTTTG-1,AD20,AD20,Skin Suction blister,Spontaneously healed atopic dermatitis,GSE162054,0
AD20_AAACCCATCTCCCTAG-1,AD20,AD20,Skin Suction blister,Spontaneously healed atopic dermatitis,GSE162054,0
AD20_AAACGAACATGGCGCT-1,AD20,AD20,Skin Suction blister,Spontaneously healed atopic dermatitis,GSE162054,0
AD20_AAACGAATCACAGTGT-1,AD20,AD20,Skin Suction blister,Spontaneously healed atopic dermatitis,GSE162054,0
AD20_AAACGCTCAGGATGAC-1,AD20,AD20,Skin Suction blister,Spontaneously healed atopic dermatitis,GSE162054,0
...,...,...,...,...,...,...
AD24_TTTCGATAGCATACTC-1,AD24,AD24,Skin Suction blister,Spontaneously healed atopic dermatitis,GSE162054,0
AD24_TTTGACTAGCATTTCG-1,AD24,AD24,Skin Suction blister,Spontaneously healed atopic dermatitis,GSE162054,0
AD24_TTTGACTAGTGAATAC-1,AD24,AD24,Skin Suction blister,Spontaneously healed atopic dermatitis,GSE162054,0
AD24_TTTGACTAGTTCATCG-1,AD24,AD24,Skin Suction blister,Spontaneously healed atopic dermatitis,GSE162054,0


In [10]:
def assign_severity(easi_score):
    if pd.isna(easi_score):
        return None
    elif easi_score in [0, 1]:
        return 'clear'
    elif easi_score in [1.1, 7]:
        return 'mild'
    elif easi_score in [7.1, 21]:
        return 'moderate'
    elif easi_score in [21.1, 50]:
        return 'severe'
    else:
        return 'unknown'

adata.obs['severity'] = adata.obs['easi_score'].apply(assign_severity).values
adata.obs

Unnamed: 0_level_0,sample,patient,tissue,condition,batch,easi_score,severity
barcodes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AD20_AAACCCAGTGTGTTTG-1,AD20,AD20,Skin Suction blister,Spontaneously healed atopic dermatitis,GSE162054,0,clear
AD20_AAACCCATCTCCCTAG-1,AD20,AD20,Skin Suction blister,Spontaneously healed atopic dermatitis,GSE162054,0,clear
AD20_AAACGAACATGGCGCT-1,AD20,AD20,Skin Suction blister,Spontaneously healed atopic dermatitis,GSE162054,0,clear
AD20_AAACGAATCACAGTGT-1,AD20,AD20,Skin Suction blister,Spontaneously healed atopic dermatitis,GSE162054,0,clear
AD20_AAACGCTCAGGATGAC-1,AD20,AD20,Skin Suction blister,Spontaneously healed atopic dermatitis,GSE162054,0,clear
...,...,...,...,...,...,...,...
AD24_TTTCGATAGCATACTC-1,AD24,AD24,Skin Suction blister,Spontaneously healed atopic dermatitis,GSE162054,0,clear
AD24_TTTGACTAGCATTTCG-1,AD24,AD24,Skin Suction blister,Spontaneously healed atopic dermatitis,GSE162054,0,clear
AD24_TTTGACTAGTGAATAC-1,AD24,AD24,Skin Suction blister,Spontaneously healed atopic dermatitis,GSE162054,0,clear
AD24_TTTGACTAGTTCATCG-1,AD24,AD24,Skin Suction blister,Spontaneously healed atopic dermatitis,GSE162054,0,clear


In [2]:
adata = sc.read('/nfs/data/COST_IBD/data/atopic_dermatitis/anndata/filtered/rindler_filtered.h5ad')
adata

AnnData object with n_obs × n_vars = 1532 × 33538
    obs: 'batch', 'cell_type', 'condition', 'sex', 'patient', 'tissue', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_20_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'log1p_total_counts_hb', 'pct_counts_hb'
    var: 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts'

In [9]:
adata.obs['condition'].unique()

['Spontaneously healed atopic dermatitis']
Categories (1, object): ['Spontaneously healed atopic dermatitis']

In [5]:
adata

AnnData object with n_obs × n_vars = 3598 × 33538
    obs: 'sample', 'patient', 'tissue', 'condition', 'batch', 'easi_score', 'severity'

In [6]:
adata_filtered = sc.read('/nfs/data/COST_IBD/versions/AD/03_00_00/data/rindler.h5ad')
adata_filtered

AnnData object with n_obs × n_vars = 3598 × 33538
    obs: 'sample', 'patient', 'tissue', 'condition', 'batch', 'easi_score', 'severity'