In [1]:
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import scanpy as sc

from sciso.plot import *

Set plot style

In [2]:
set_nature_style()

In [None]:
sc.read

# QC of natural isotope abundance correction and single-cell analysis

## Set paths

In [3]:
# Input
data_dir = Path(r"/media/buglakova/embl_data/data/lipid_isotope/20220322_AB_DKFZHypoxia") 
slide_dirs = [data_dir / "slide2",
             data_dir / "slide3"]
spacem_dirs = [slide_dir / "spacem_data" for slide_dir in slide_dirs]
anndata_paths = [slide_dir / "anndata" for slide_dir in slide_dirs]
adata_paths = [anndata_path / "adata_concat.h5ad" for anndata_path in anndata_paths]
adata_am_paths = [anndata_path / "adata_concat_am_isocor.h5ad" for anndata_path in anndata_paths]
metadata_path = data_dir / "hypoxia_metadata.csv"

# Output
anndata_path = data_dir / "anndata"
anndata_path.mkdir(parents=True, exist_ok=True)

## Load AnnData and metadata

In [4]:
metadata = pd.read_csv(metadata_path)

In [5]:
metadata.head()

Unnamed: 0,datasetId,datasetName,group,submitter,PI,organism,organismPart,condition,slide,well,batch,growthConditions,GFP_ground_truth,ionisationSource,maldiMatrix,analyzer,resPower400,polarity,uploadDateTime,mixture
0,2022-04-11_10h23m28s,2022-04-10_AB_DKFZHypoxia_S3_W1_DANneg_s10a33_...,♡EMBL♡,Mans Ekelof,Theodore Alexandrov,Mus musculus (mouse),Liver,Normoxia,3,1,S3W1,Unlabeled,0,AP-SMALDI5,"1,5-diaminonaphthalene (DAN)",Orbitrap,98995,negative,2022-04-11 10:23:28,0
1,2022-04-11_10h24m59s,2022-04-10_AB_DKFZHypoxia_S3_W2_DANneg_s10a33_...,♡EMBL♡,Mans Ekelof,Theodore Alexandrov,Mus musculus (mouse),Liver,Hypoxia,3,2,S3W2,Unlabeled,1,AP-SMALDI5,"1,5-diaminonaphthalene (DAN)",Orbitrap,98995,negative,2022-04-11 10:25:00,0
2,2022-04-11_10h26m29s,2022-04-10_AB_DKFZHypoxia_S3_W3_DANneg_s10a33_...,♡EMBL♡,Mans Ekelof,Theodore Alexandrov,Mus musculus (mouse),Liver,Hypoxia,3,3,S3W3,Labeled,1,AP-SMALDI5,"1,5-diaminonaphthalene (DAN)",Orbitrap,98995,negative,2022-04-11 10:26:29,0
3,2022-04-11_10h28m11s,2022-04-10_AB_DKFZHypoxia_S3_W4_DANneg_s10a33_...,♡EMBL♡,Mans Ekelof,Theodore Alexandrov,Mus musculus (mouse),Liver,Hypoxia + Normoxia,3,4,S3W4,Labeled,2,AP-SMALDI5,"1,5-diaminonaphthalene (DAN)",Orbitrap,98995,negative,2022-04-11 10:28:12,1
4,2022-04-11_10h31m36s,2022-04-10_AB_DKFZHypoxia_S3_W5_DANneg_s10a33_...,♡EMBL♡,Mans Ekelof,Theodore Alexandrov,Mus musculus (mouse),Liver,Normoxia,3,5,S3W5,Labeled,0,AP-SMALDI5,"1,5-diaminonaphthalene (DAN)",Orbitrap,98995,negative,2022-04-11 10:31:37,0


In [6]:
adatas = [sc.read_h5ad(adata_path) for adata_path in adata_paths]
adatas_am = [sc.read_h5ad(adata_am_path) for adata_am_path in adata_am_paths]

## Concatenate adatas from different slides

In [7]:
adata_concat = sc.AnnData.concatenate(
        *adatas,
        batch_key="slide_batch",
        join="inner",
        fill_value=0.0,
    )

adata_am_concat = sc.AnnData.concatenate(
        *adatas_am,
        batch_key="slide_batch",
        join="inner",
        fill_value=0.0,
    )

  [AnnData(sparse.csr_matrix(a.shape), obs=a.obs) for a in all_adatas],
  [AnnData(sparse.csr_matrix(a.shape), obs=a.obs) for a in all_adatas],


## Assign conditions based on metadata

In [8]:
adata_concat.obs = adata_concat.obs.reset_index().merge(metadata, on="batch", how="left").set_index("cell_id")
adata_am_concat.obs = adata_am_concat.obs.reset_index().merge(metadata, on="batch", how="left").set_index("cell_id")

## Write concatenated adata

In [9]:
output_file = anndata_path / "hypoxia_adata_concat.h5ad"
adata_concat.write(output_file.with_suffix(".h5ad"))

output_file = anndata_path / "hypoxia_adata_am_concat.h5ad"
adata_am_concat.write(output_file.with_suffix(".h5ad"))