## Full matrix data object

We are now completing the whole object.

__authors__ = ["Ciro Ramírez-Suástegui", "Lisa Sikkema"]

__copyright__ = "Copyright 2022-11-15, Helmholtz Zentrum Muenchen"

__license__ = "GPL"

__version__ = "0.0.9"

__email__ = "ciro.suastegui@helmholtz-muenchen.de, lisa.sikkema@helmholtz-muenchen.de"

__status__ = "Prototype"

#### Structure

* [Global variables and paths](#bullet1)
* [Loading data](#bullet2)
* [Pre-processing](#bullet3)
* [Main](#bullet4)
* [Conclusions](#bullet5)
* [Save](#bullet6)

### Environment setup

In [1]:
%load_ext autoreload
%autoreload 2
import importlib

spam_spec = importlib.util.find_spec("lab_black")
if spam_spec is not None:
    %load_ext lab_black

In [2]:
# basic modules
import warnings, os, re
import time, sys, gc

warnings.simplefilter(action="ignore", category=FutureWarning)
warnings.simplefilter(action="ignore", category=UserWarning)

In [3]:
# in-house/developing modules
# tools modules
import scanpy as sc
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.sparse as sp

In [4]:
sc.logging.print_versions()

-----
anndata     0.8.0
scanpy      1.9.1
-----
7b32b9a39ad70713acde__mypyc NA
PIL                         9.2.0
autoreload                  NA
backcall                    0.2.0
beta_ufunc                  NA
binom_ufunc                 NA
black                       22.6.0
blib2to3                    NA
cffi                        1.15.1
click                       8.1.3
cycler                      0.10.0
cython_runtime              NA
dateutil                    2.8.2
debugpy                     1.5.1
decorator                   5.1.1
defusedxml                  0.7.1
entrypoints                 0.4
h5py                        3.7.0
igraph                      0.9.11
ipykernel                   6.15.2
ipython_genutils            0.2.0
jedi                        0.18.2
joblib                      1.1.0
jupyter_server              1.23.4
kiwisolver                  1.4.4
lab_black                   NA
leidenalg                   0.8.10
llvmlite                    0.39.0
matplotlib    

In [5]:
print("Environment:", re.sub(".os.py", "", os.__file__))
parentpath = lambda _path, n: os.sep.join(_path.split(os.sep)[:-n])
print("Working at:", re.sub(parentpath(os.getcwd(), 2), "", os.getcwd()))

Environment: /home/icb/ciro.suastegui/miniconda3/envs/HLCA_basic/lib/python3.7
Working at: /notebooks/3_atlas_extension


### Global variables and paths <a class="anchor" id="bullet1"></a>

#### Inputs

In [6]:
# .layers: counts
cdata_inp = "../../data/HLCA_extended/HLCA_extended/HLCA_extended_full_raw.h5ad"
# .layers: soupXed
sdata_inp = "../../data/HLCA_extended/HLCA_extended/HLCA_extended_full.h5ad"
# .var
gname_inp = (
    "../../data/HLCA_extended/extension_datasets/features/all_update_table_flat.csv"
)
# uns: 'neighbors'
# obsm: 'X_umap'
# obsp: 'connectivities', 'distances'
# obsm: 'X_scanvi_emb' is in .X
embed_inp = "../../data/HLCA_extended/HLCA_extended/HLCA_extended_scarches_emb.h5ad"

#### Output

In [7]:
adata_out = "../../data/HLCA_extended/HLCA_extended/HLCA_extended_full_object.h5ad"

### Loading data <a class="anchor" id="bullet2"></a>

In [8]:
%%time
sdata = sc.read(sdata_inp, backed='r')

CPU times: user 6.9 s, sys: 5.58 s, total: 12.5 s
Wall time: 24.6 s


In [9]:
sdata

AnnData object with n_obs × n_vars = 2382658 × 59586 backed at '../../data/HLCA_extended/HLCA_extended/HLCA_extended_full.h5ad'
    obs: 'sample', 'original_celltype_ann', 'study_long', 'study', 'last_author_PI', 'subject_ID', 'subject_ID_as_published', 'pre_or_postnatal', 'age_in_years', 'age_range', 'sex', 'smoking_status', 'smoking_history', 'BMI', 'known_lung_disease', 'condition', 'subject_type', 'cause_of_death', 'sample_type', 'anatomical_region_coarse', 'anatomical_region_detailed', 'tissue_dissociation_protocol', 'cells_or_nuclei', 'single_cell_platform', "3'_or_5'", 'enrichment', 'sequencing_platform', 'reference_genome_coarse', 'ensembl_release_reference_genome', 'cell_ranger_version', 'disease_status', 'fresh_or_frozen', 'cultured', 'cell_viability_%', 'comments', 'Processing_site', 'dataset', 'anatomical_region_level_1', 'anatomical_region_level_2', 'anatomical_region_level_3', 'anatomical_region_highest_res', 'age', 'ann_level_1', 'ann_level_2', 'ann_level_3', 'ann_level_

In [10]:
%%time
cdata = sc.read(cdata_inp, backed='r')

CPU times: user 7.05 s, sys: 4.31 s, total: 11.4 s
Wall time: 18.2 s


In [11]:
cdata

AnnData object with n_obs × n_vars = 2382658 × 59586 backed at '../../data/HLCA_extended/HLCA_extended/HLCA_extended_full_raw.h5ad'
    obs: 'sample', 'original_celltype_ann', 'study_long', 'study', 'last_author_PI', 'subject_ID', 'subject_ID_as_published', 'pre_or_postnatal', 'age_in_years', 'age_range', 'sex', 'smoking_status', 'smoking_history', 'BMI', 'known_lung_disease', 'condition', 'subject_type', 'cause_of_death', 'sample_type', 'anatomical_region_coarse', 'anatomical_region_detailed', 'tissue_dissociation_protocol', 'cells_or_nuclei', 'single_cell_platform', "3'_or_5'", 'enrichment', 'sequencing_platform', 'reference_genome_coarse', 'ensembl_release_reference_genome', 'cell_ranger_version', 'disease_status', 'fresh_or_frozen', 'cultured', 'cell_viability_%', 'comments', 'Processing_site', 'dataset', 'anatomical_region_level_1', 'anatomical_region_level_2', 'anatomical_region_level_3', 'anatomical_region_highest_res', 'age', 'ann_level_1', 'ann_level_2', 'ann_level_3', 'ann_le

In [12]:
%%time
embed = sc.read(embed_inp)

CPU times: user 4.56 s, sys: 5.83 s, total: 10.4 s
Wall time: 12.6 s


In [13]:
embed

AnnData object with n_obs × n_vars = 2382658 × 30
    obs: 'sample', 'original_celltype_ann', 'study_long', 'study', 'last_author_PI', 'subject_ID', 'subject_ID_as_published', 'pre_or_postnatal', 'age_in_years', 'age_range', 'sex', 'smoking_status', 'smoking_history', 'BMI', 'known_lung_disease', 'condition', 'subject_type', 'cause_of_death', 'sample_type', 'anatomical_region_coarse', 'anatomical_region_detailed', 'tissue_dissociation_protocol', 'cells_or_nuclei', 'single_cell_platform', "3'_or_5'", 'enrichment', 'sequencing_platform', 'reference_genome_coarse', 'ensembl_release_reference_genome', 'cell_ranger_version', 'disease_status', 'fresh_or_frozen', 'cultured', 'cell_viability_%', 'comments', 'Processing_site', 'dataset', 'anatomical_region_level_1', 'anatomical_region_level_2', 'anatomical_region_level_3', 'anatomical_region_highest_res', 'age', 'ann_level_1', 'ann_level_2', 'ann_level_3', 'ann_level_4', 'ann_level_5', 'ann_highest_res', 'ann_new', 'n_genes', 'total_counts', 'l

In [14]:
gname = pd.read_csv(gname_inp, index_col=0)

In [15]:
gname

Unnamed: 0,original,symbol_type,ensembl_id,new,repeat,study
0,TSPAN6,last_node,ENSG00000000003.15,TSPAN6,False,Kaminski_2020
1,TNMD,last_node,ENSG00000000005.6,TNMD,False,Kaminski_2020
2,DPM1,last_node,ENSG00000000419.14,DPM1,False,Kaminski_2020
3,SCYL3,last_node,ENSG00000000457.14,SCYL3,False,Kaminski_2020
4,C1orf112,last_node,ENSG00000000460.17,C1ORF112,False,Kaminski_2020
...,...,...,...,...,...,...
810964,ZYG11A,last_node,ENSG00000203995.10,ZYG11A,False,Meyer_Nikolic_2022
810965,ZYG11B,last_node,ENSG00000162378.13,ZYG11B,False,Meyer_Nikolic_2022
810966,ZYX,last_node,ENSG00000285443.2,ZYX,False,Meyer_Nikolic_2022
810967,ZZEF1,last_node,ENSG00000074755.15,ZZEF1,False,Meyer_Nikolic_2022


### Pre-processing <a class="anchor" id="bullet3"></a>

#### Setting up features' metadata

In [16]:
%%time
gname["original_aggr"] = gname.groupby(["new"])["original"].transform(
    lambda x: ";".join(list(set(x)))
)

CPU times: user 4.72 s, sys: 46.2 ms, total: 4.76 s
Wall time: 4.89 s


In [17]:
df = gname.copy()
df = df.loc[:, ["original_aggr", "ensembl_id", "new"]]

In [18]:
df = df.drop_duplicates(subset=["new"]).set_index("new")
df.shape
df

Unnamed: 0_level_0,original_aggr,ensembl_id
new,Unnamed: 1_level_1,Unnamed: 2_level_1
TSPAN6,TSPAN6,ENSG00000000003.15
TNMD,TNMD,ENSG00000000005.6
DPM1,DPM1,ENSG00000000419.14
SCYL3,SCYL3,ENSG00000000457.14
C1ORF112,C1orf112;C1ORF112,ENSG00000000460.17
...,...,...
VIRAL_Measles_morbillivirus,VIRAL_Measles_morbillivirus,
VIRAL_Mumps_rubulavirus,VIRAL_Mumps_rubulavirus,
VIRAL_Rubella,VIRAL_Rubella,
VIRAL_SARS-CoV2,VIRAL_SARS-CoV2,


In [19]:
ensembl_id_duplicates = list(
    set(df.loc[df["ensembl_id"].duplicated(), "ensembl_id"]) - set(["", np.nan])
)
ensembl_id_duplicates[:5]

[]

In [20]:
if len(ensembl_id_duplicates) > 0:
    temp = df.loc[df["ensembl_id"].isin(ensembl_id_duplicates), :].sort_values(
        by=["ensembl_id"], ascending=False
    )
    print(temp.iloc[:5, :])
else:
    print("No repeats in ensembl IDs")

No repeats in ensembl IDs


In [21]:
df.loc[sdata.var_names, :]

Unnamed: 0,original_aggr,ensembl_id
1-DEC,1-DEC,
A1BG,A1BG,ENSG00000121410.12
A1BG-AS1,A1BG-AS1,ENSG00000268895.6
A1CF,A1CF,ENSG00000148584.15
A2M,A2M,ENSG00000175899.15
...,...,...
rab1b,RAB1B,ENSG00000174903.16
stc1,STC1,ENSG00000159167.12
stxbp1,STXBP1,ENSG00000136854.24
tec,TEC,ENSG00000135605.13


#### Make sure `obs_names` have the same order

In [22]:
print(f"Same values? {set(sdata.obs_names) == set(embed.obs_names)}")

Same values? True


In [23]:
print(f"Same order? {(sdata.obs_names == embed.obs_names).all()}")

Same order? False


In [24]:
embed_obs = embed.obs_names.tolist()
meyer_obs = sdata.obs_names[sdata.obs["study"] == "Meyer_2021"].tolist()
other_obs = sdata.obs_names[sdata.obs["study"] != "Meyer_2021"].tolist()

Sorting embedding to the same order as the final Meyer + Rest (zero's only) matrix

In [25]:
embed = embed[meyer_obs + other_obs]

### Main <a class="anchor" id="bullet4"></a>

In [26]:
%%time
adata_c = sdata[meyer_obs + other_obs].to_memory()

CPU times: user 1min 59s, sys: 1min 24s, total: 3min 24s
Wall time: 3min 44s


In [27]:
adata_c.var = df.loc[adata_c.var_names, :]

In [28]:
adata_c.uns["neighbors"] = embed.uns["neighbors"]

In [29]:
adata_c.obsp["connectivities"] = embed.obsp["connectivities"]

In [30]:
adata_c.obsp["distances"] = embed.obsp["distances"]

In [31]:
adata_c.obsm["X_umap"] = embed.obsm["X_umap"]

Do you know if I need to use `X.copy()` or `.X` is fine?

Putting the embedding into `obsm`.

In [32]:
adata_c.obsm["X_scanvi_emb"] = embed.X.copy()

In [33]:
del embed

#### Normalization and log-transformation

In [34]:
%%time
sc.pp.normalize_total(adata_c, target_sum=7699)

CPU times: user 16.4 s, sys: 3.42 s, total: 19.8 s
Wall time: 19.9 s


In [35]:
%%time
sc.pp.log1p(adata_c)

CPU times: user 58 s, sys: 0 ns, total: 58 s
Wall time: 58.2 s


#### Save raw counts in a layer and calculate total counts

We reload this to memory because we wanted to avoid it taking up memory while normalizing and log-transforming

In [36]:
%%time
adata_c.layers["counts"] = cdata[meyer_obs + other_obs].to_memory().X.copy()

CPU times: user 2min 22s, sys: 1min 22s, total: 3min 45s
Wall time: 4min 17s


In [37]:
adata_c.obs["total_counts"] = adata_c.layers["counts"].sum(axis=1)

#### Adding Meyer_2021 soupXed data

In [38]:
%%time
sdata_meyer2021 = sdata[meyer_obs].to_memory().X.copy()

CPU times: user 5.35 s, sys: 1.91 s, total: 7.26 s
Wall time: 10.2 s


In [39]:
adata_c.layers["soupX"] = sp.vstack(
    (
        sdata_meyer2021,
        sp.csr_matrix(
            (sdata.shape[0] - sdata_meyer2021.shape[0], sdata.shape[1]),
            dtype=np.float32,
        ),
    ),
    format="csr",
)

#### Checking matrices' sparcity

In [40]:
adata_c.X

<2382658x59586 sparse matrix of type '<class 'numpy.float32'>'
	with 4333222727 stored elements in Compressed Sparse Row format>

In [41]:
adata_c.layers["counts"]

<2382658x59586 sparse matrix of type '<class 'numpy.float32'>'
	with 4312860970 stored elements in Compressed Sparse Row format>

In [42]:
adata_c.layers["soupX"]

<2382658x59586 sparse matrix of type '<class 'numpy.float32'>'
	with 187231837 stored elements in Compressed Sparse Row format>

Meyer specifically

In [43]:
adata_c[adata_c.obs["study"] == "Meyer_2021"].X

<129340x59586 sparse matrix of type '<class 'numpy.float32'>'
	with 187231837 stored elements in Compressed Sparse Row format>

In [45]:
adata_c[adata_c.obs["study"] == "Meyer_2021"].layers["counts"]

<129340x59586 sparse matrix of type '<class 'numpy.float32'>'
	with 166870080 stored elements in Compressed Sparse Row format>

In [46]:
adata_c[adata_c.obs["study"] == "Meyer_2021"].layers["soupX"]

<129340x59586 sparse matrix of type '<class 'numpy.float32'>'
	with 187231837 stored elements in Compressed Sparse Row format>

Tata because other studies get jealous

In [47]:
adata_c[adata_c.obs["study"] == "Tata_unpubl"].X

<21700x59586 sparse matrix of type '<class 'numpy.float32'>'
	with 57876664 stored elements in Compressed Sparse Row format>

In [48]:
adata_c[adata_c.obs["study"] == "Tata_unpubl"].layers["counts"]

<21700x59586 sparse matrix of type '<class 'numpy.float32'>'
	with 57876664 stored elements in Compressed Sparse Row format>

In [49]:
adata_c[adata_c.obs["study"] == "Tata_unpubl"].layers["soupX"]

<21700x59586 sparse matrix of type '<class 'numpy.float32'>'
	with 0 stored elements in Compressed Sparse Row format>

Closing up to Meyer counts difference across matrices

In [58]:
adata_c[adata_c.obs["study"] == "Meyer_2021"].X[-10:, -10:-5].toarray()

array([[0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.58168983, 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ],
       [1.3137116 , 0.        , 0.        , 0.        , 0.        ],
       [1.9351146 , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ]],
      dtype=float32)

In [59]:
adata_c[adata_c.obs["study"] == "Meyer_2021"].layers["counts"][-10:, -10:-5].toarray()

array([[0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]], dtype=float32)

In [57]:
adata_c[adata_c.obs["study"] == "Meyer_2021"].layers["soupX"][-10:, -10:-5].toarray()

array([[0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.99843436, 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.9994829 , 0.        , 0.        , 0.        , 0.        ],
       [0.9997394 , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ]],
      dtype=float32)

Checking other parts of the matrix

In [64]:
adata_c[adata_c.obs["study"] == "Meyer_2021"].X[-210:-200, -905:-900].toarray()

array([[0.       , 3.0992782, 0.       , 2.8291957, 0.       ],
       [0.6540061, 3.0111947, 1.5428032, 0.6433596, 0.       ],
       [0.       , 4.818784 , 0.       , 1.9212245, 0.       ],
       [0.       , 2.5507424, 3.3594618, 0.       , 0.       ],
       [0.       , 3.6311038, 1.1894857, 1.1877304, 0.       ],
       [0.       , 4.6386056, 2.1822228, 0.       , 0.       ],
       [0.       , 3.7287457, 1.0569537, 1.3399746, 0.       ],
       [0.       , 4.1396537, 0.       , 1.4845533, 0.       ],
       [0.       , 3.9706714, 2.070712 , 2.0700052, 0.       ],
       [0.       , 0.       , 2.0253978, 2.024512 , 0.       ]],
      dtype=float32)

In [65]:
adata_c[adata_c.obs["study"] == "Meyer_2021"].layers["counts"][
    -210:-200, -905:-900
].toarray()

array([[ 0.,  3.,  0.,  3.,  0.],
       [ 1., 21.,  4.,  1.,  0.],
       [ 0., 42.,  0.,  1.,  0.],
       [ 0.,  3.,  7.,  0.,  0.],
       [ 0., 16.,  1.,  1.,  0.],
       [ 0., 25.,  2.,  0.,  0.],
       [ 0., 43.,  2.,  3.,  0.],
       [ 0., 18.,  0.,  1.,  0.],
       [ 0., 30.,  4.,  4.,  0.],
       [ 0.,  0.,  1.,  1.,  0.]], dtype=float32)

In [66]:
adata_c[adata_c.obs["study"] == "Meyer_2021"].layers["soupX"][
    -210:-200, -905:-900
].toarray()

array([[ 0.        ,  3.9823215 ,  0.        ,  2.9952738 ,  0.        ],
       [ 1.        , 20.917482  ,  3.9834964 ,  0.97793925,  0.        ],
       [ 0.        , 41.970463  ,  0.        ,  1.9921033 ,  0.        ],
       [ 0.        ,  2.976199  ,  6.9952397 ,  0.        ,  0.        ],
       [ 0.        , 15.962845  ,  0.99256897,  0.9900667 ,  0.        ],
       [ 0.        , 25.977457  ,  1.9954914 ,  0.        ,  0.        ],
       [ 0.        , 42.917206  ,  1.9834412 ,  2.9778655 ,  0.        ],
       [ 0.        , 17.973505  ,  0.        ,  0.99291664,  0.        ],
       [ 0.        , 29.952091  ,  3.9904182 ,  3.9871917 ,  0.        ],
       [ 0.        ,  0.        ,  0.9969799 ,  0.9959629 ,  0.        ]],
      dtype=float32)

Checking if Kaminski's data is integers.

In [71]:
adata_c[adata_c.obs["study"] == "Kaminski_2020"].X

<307650x59586 sparse matrix of type '<class 'numpy.float32'>'
	with 675634120 stored elements in Compressed Sparse Row format>

In [72]:
adata_c[adata_c.obs["study"] == "Kaminski_2020"].layers["counts"]

<307650x59586 sparse matrix of type '<class 'numpy.float32'>'
	with 675634120 stored elements in Compressed Sparse Row format>

In [73]:
adata_c[adata_c.obs["study"] == "Kaminski_2020"].layers["soupX"]

<307650x59586 sparse matrix of type '<class 'numpy.float32'>'
	with 0 stored elements in Compressed Sparse Row format>

In [75]:
adata_c[adata_c.obs["study"] == "Kaminski_2020"].layers["counts"][
    -10:, -10:-5
].toarray()

array([[ 0.,  0.,  0.,  1.,  0.],
       [ 0.,  1.,  0.,  0.,  0.],
       [ 2.,  0.,  0.,  0.,  0.],
       [ 1.,  0.,  0.,  0.,  0.],
       [11.,  1.,  0.,  0.,  0.],
       [ 2.,  0.,  0.,  0.,  0.],
       [ 2.,  0.,  0.,  0.,  0.],
       [ 1.,  0.,  0.,  0.,  0.],
       [ 2.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  2.]], dtype=float32)

In [80]:
pd.Series(
    adata_c[adata_c.obs["study"] == "Kaminski_2020"]
    .layers["counts"][:100, :]
    .A.flatten()
).value_counts()

0.0       5773416
1.0        119184
2.0         30577
3.0         12151
4.0          6238
           ...   
317.0           1
245.0           1
1134.0          1
139.0           1
283.0           1
Length: 209, dtype: int64

In [85]:
%%time
non_int = adata_c[adata_c.obs["study"] == "Kaminski_2020"].layers["counts"].toarray() % 1 != 0

CPU times: user 2min 27s, sys: 37.8 s, total: 3min 5s
Wall time: 3min 5s


In [86]:
total_non_ints = sum(non_int)

In [88]:
sum(total_non_ints)

0

### Conclusions <a class="anchor" id="bullet5"></a>

In [67]:
adata_c

AnnData object with n_obs × n_vars = 2382658 × 59586
    obs: 'sample', 'original_celltype_ann', 'study_long', 'study', 'last_author_PI', 'subject_ID', 'subject_ID_as_published', 'pre_or_postnatal', 'age_in_years', 'age_range', 'sex', 'smoking_status', 'smoking_history', 'BMI', 'known_lung_disease', 'condition', 'subject_type', 'cause_of_death', 'sample_type', 'anatomical_region_coarse', 'anatomical_region_detailed', 'tissue_dissociation_protocol', 'cells_or_nuclei', 'single_cell_platform', "3'_or_5'", 'enrichment', 'sequencing_platform', 'reference_genome_coarse', 'ensembl_release_reference_genome', 'cell_ranger_version', 'disease_status', 'fresh_or_frozen', 'cultured', 'cell_viability_%', 'comments', 'Processing_site', 'dataset', 'anatomical_region_level_1', 'anatomical_region_level_2', 'anatomical_region_level_3', 'anatomical_region_highest_res', 'age', 'ann_level_1', 'ann_level_2', 'ann_level_3', 'ann_level_4', 'ann_level_5', 'ann_highest_res', 'ann_new', 'n_genes', 'total_counts',

### Save <a class="anchor" id="bullet6"></a>

In [69]:
%%time
adata_c.write(filename=adata_out)

CPU times: user 30.7 s, sys: 2min 25s, total: 2min 56s
Wall time: 3min 16s


Done.