In [1]:
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import scanpy as sc

from sciso.plot import *

Set plot style

In [2]:
set_nature_style()

# QC of natural isotope abundance correction and single-cell analysis

## Set paths

In [3]:
# Input
data_dir = Path(r"/media/buglakova/Extreme SSD/isotope_tracing_backup/data/share/20220411_AB_DKFZACLYac") 
slide_dirs = [data_dir / "slide1",
             data_dir / "slide2",
             data_dir / "slide3"]
spacem_dirs = [slide_dir / "spacem_data" for slide_dir in slide_dirs]
anndata_paths = [slide_dir / "anndata" for slide_dir in slide_dirs]
adata_paths = [anndata_path / "adata_concat.h5ad" for anndata_path in anndata_paths]
adata_am_paths = [anndata_path / "adata_concat_am_isocor.h5ad" for anndata_path in anndata_paths]
metadata_path = data_dir / "aclykd_metadata.csv"

# Output
anndata_path = data_dir / "anndata"
anndata_path.mkdir(parents=True, exist_ok=True)

## Load AnnData and metadata

In [4]:
metadata = pd.read_csv(metadata_path)

In [5]:
metadata.head()

Unnamed: 0,datasetId,datasetName,group,submitter,PI,organism,organismPart,condition,slide,well,batch,growthConditions,GFP_ground_truth,ionisationSource,maldiMatrix,analyzer,resPower400,polarity,uploadDateTime,mixture
0,2022-04-14_10h46m03s,2022-04-13_ME_DKFZACLY_S1_W1_DANneg_s10a33_100...,♡EMBL♡,Mans Ekelof,Theodore Alexandrov,Mus musculus (mouse),Liver,ACLYkd Ol.2 + Wildtype,1,1,S1W1,Labeled,2,AP-SMALDI5,"1,5-diaminonaphthalene (DAN)",Orbitrap,98995,negative,2022-04-14 10:46:03,1
1,2022-04-14_10h58m32s,2022-04-13_ME_DKFZACLY_S1_W2_DANneg_s10a33_100...,♡EMBL♡,Mans Ekelof,Theodore Alexandrov,Mus musculus (mouse),Liver,ACLYkd Ol.2 + Wildtype,1,2,S1W2,Labeled,2,AP-SMALDI5,"1,5-diaminonaphthalene (DAN)",Orbitrap,98995,negative,2022-04-14 10:58:32,1
2,2022-04-14_11h08m04s,2022-04-13_ME_DKFZACLY_S1_W3_DANneg_s10a33_100...,♡EMBL♡,Mans Ekelof,Theodore Alexandrov,Mus musculus (mouse),Liver,ACLYkd Ol.2,1,3,S1W3,Labeled,1,AP-SMALDI5,"1,5-diaminonaphthalene (DAN)",Orbitrap,98995,negative,2022-04-14 11:08:05,0
3,2022-04-14_11h11m49s,2022-04-13_ME_DKFZACLY_S1_W5_DANneg_s10a33_100...,♡EMBL♡,Mans Ekelof,Theodore Alexandrov,Mus musculus (mouse),Liver,ACLY Ol.1 + Wildtype,1,5,S1W5,Labeled,2,AP-SMALDI5,"1,5-diaminonaphthalene (DAN)",Orbitrap,98995,negative,2022-04-14 11:11:50,1
4,2022-04-14_11h15m05s,2022-04-13_ME_DKFZACLY_S1_W6_DANneg_s10a33_100...,♡EMBL♡,Mans Ekelof,Theodore Alexandrov,Mus musculus (mouse),Liver,ACLY Ol.1 + Wildtype,1,6,S1W6,Labeled,2,AP-SMALDI5,"1,5-diaminonaphthalene (DAN)",Orbitrap,98995,negative,2022-04-14 11:15:05,1


In [6]:
adatas = [sc.read_h5ad(adata_path) for adata_path in adata_paths]
adatas_am = [sc.read_h5ad(adata_am_path) for adata_am_path in adata_am_paths]

## Concatenate adatas from different slides

In [7]:
adata_concat = sc.AnnData.concatenate(
        *adatas,
        batch_key="slide_batch",
        join="inner",
        fill_value=0.0,
    )

adata_am_concat = sc.AnnData.concatenate(
        *adatas_am,
        batch_key="slide_batch",
        join="inner",
        fill_value=0.0,
    )

  adata_concat = sc.AnnData.concatenate(
  adata_am_concat = sc.AnnData.concatenate(


## Assign conditions based on metadata

In [8]:
adata_concat.obs = adata_concat.obs.reset_index().merge(metadata, on="batch", how="left").set_index("cell_id")
adata_am_concat.obs = adata_am_concat.obs.reset_index().merge(metadata, on="batch", how="left").set_index("cell_id")

## Write concatenated adata

In [9]:
output_file = anndata_path / "aclykd_adata_concat.h5ad"
adata_concat.write(output_file.with_suffix(".h5ad"))

output_file = anndata_path / "aclykd_adata_am_concat.h5ad"
adata_am_concat.write(output_file.with_suffix(".h5ad"))