https://github.com/chanzuckerberg/single-cell-curation/issues/614

In [3]:
#import cxg_upload
import numpy as np
import os
import scanpy as sc
import subprocess
import anndata as ad
from scipy import sparse
from datetime import datetime

In [4]:
def validate(file):
    validate_process = subprocess.run(['cellxgene-schema', 'validate', file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    for line in validate_process.stdout.decode('utf-8').split('\n'):
        print(line)
    for line in validate_process.stderr.decode('utf-8').split('\n'):
        print(line)
        if 'is_valid=' in line:
            valid = line.split('=')[-1]
            return valid

In [5]:
def save_and_test(adata, expected):
    now = datetime.now() 
    dt_string = now.strftime('%m/%d %H:%M')
    adata.uns['title'] += (' - ' + dt_string)

    file = 'test.h5ad'
    adata.write(filename=file)
    adata = sc.read_h5ad(file)

    if adata.raw:
        print("Datatype of raw.X matrix: {}".format(adata.raw.X.dtype))
        print("Format of raw.X matrix: {}".format(get_format(adata.raw.X)))
        non_integer = np.any(~np.equal(np.mod(adata.raw.X.data, 1), 0))
        if non_integer == False:
            print('raw.X is all integers')
        else:
            print('raw.X contains non-integer values')
    else:
        print("raw slot is absent")
    print('---------')

    print("Datatype of X matrix: {}".format(adata.X.dtype))
    print("Format of X matrix: {}".format(get_format(adata.X)))
    non_integer = np.any(~np.equal(np.mod(adata.X.data, 1), 0))
    if non_integer == False:
        print('X is all integers')
    else:
        print('X contains non-integer values')
    print('------------------')

    valid = validate(file)
    print('------------------')
    
    if expected != valid:
        print('\033[1m\033[91mERROR\033[0m')
    else:
        print('\033[1m\033[92mPASSED\033[0m')
        #if expected == 'True':
            #cxg_upload.upload(file)
    os.remove(file)

In [6]:
def get_format(x):
    if sparse.issparse(x):
        return(x.getformat())
    elif isinstance(x, np.ndarray):
        return("numpy array")

## Test Valid Cases

In [7]:
# Raw counts in raw slot - csr
adata = sc.read_h5ad("../valid.h5ad")
adata.uns['title'] = '614 Raw counts in raw slot - csr'

save_and_test(adata, 'True')

Datatype of raw.X matrix: float32
Format of raw.X matrix: csr
raw.X is all integers
---------
Datatype of X matrix: float32
Format of X matrix: csr
X contains non-integer values
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:01.621658 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [8]:
# Raw counts in raw slot - csc
adata = sc.read_h5ad("../valid.h5ad")
adata.uns['title'] = '614 Raw counts in raw slot - csc'

raw = ad.AnnData(X=adata.raw.X, obs=adata.obs, var=adata.raw.var)
raw.X = sparse.csc_matrix(raw.X)
adata.raw = raw
save_and_test(adata, 'True')

Datatype of raw.X matrix: float32
Format of raw.X matrix: csc
raw.X is all integers
---------
Datatype of X matrix: float32
Format of X matrix: csr
X contains non-integer values
------------------
Loading dependencies
Loading validator modules

Starting validation...
Matrices are in CSC format; loading entire dataset into memory.
Validation complete in 0:00:02.200945 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [9]:
# Raw counts in raw slot - np.ndarray
adata = sc.read_h5ad("../valid.h5ad")
adata.uns['title'] = '614 Raw counts in raw slot - np.ndarray'

raw = ad.AnnData(X=adata.raw.X, obs=adata.obs, var=adata.raw.var)
raw.X = raw.X.toarray()
adata.raw = raw
save_and_test(adata, 'True')

Datatype of raw.X matrix: float32
Format of raw.X matrix: numpy array
raw.X is all integers
---------
Datatype of X matrix: float32
Format of X matrix: csr
X contains non-integer values
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:01.247990 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [10]:
# Raw counts in .X - csr
adata = sc.read_h5ad("../valid.h5ad")
adata.uns['title'] = '614 Raw counts in .X - csr'

adata.X = adata.raw.X
del adata.raw
save_and_test(adata, 'True')

raw slot is absent
---------
Datatype of X matrix: float32
Format of X matrix: csr
X is all integers
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:01.563721 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [11]:
# Raw counts in .X - csc
adata = sc.read_h5ad("../valid.h5ad")
adata.uns['title'] = '614 Raw counts in .X - csc'

adata.X = sparse.csc_matrix(raw.X)
del adata.raw
save_and_test(adata, 'True')

raw slot is absent
---------
Datatype of X matrix: float32
Format of X matrix: csc
X is all integers
------------------
Loading dependencies
Loading validator modules

Starting validation...
Matrices are in CSC format; loading entire dataset into memory.
Validation complete in 0:00:02.116349 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [12]:
# Raw counts in .X - np.ndarray
adata = sc.read_h5ad("../valid.h5ad")
adata.uns['title'] = '614 Raw counts in .X - np.ndarray'

adata.X = adata.raw.X.toarray()
del adata.raw
save_and_test(adata, 'True')

raw slot is absent
---------
Datatype of X matrix: float32
Format of X matrix: numpy array
X is all integers
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:01.126856 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [13]:
# .X is float64
adata = sc.read_h5ad("../valid.h5ad")
adata.uns['title'] = '614 .X is float64'

adata.X = adata.X.astype('float64')
save_and_test(adata, 'True')

Datatype of raw.X matrix: float32
Format of raw.X matrix: csr
raw.X is all integers
---------
Datatype of X matrix: float64
Format of X matrix: csr
X contains non-integer values
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:01.656692 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [14]:
# .X is float16
adata = sc.read_h5ad("../valid.h5ad")
adata.uns['title'] = '614 .X is float16'

adata.X = adata.X.astype('float16')
save_and_test(adata, 'True')

Datatype of raw.X matrix: float32
Format of raw.X matrix: csr
raw.X is all integers
---------
Datatype of X matrix: float16
Format of X matrix: csr
X contains non-integer values
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:01.675820 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [15]:
# .X is int with negatives
adata = sc.read_h5ad("../valid.h5ad")
adata.uns['title'] = '614 .X is int with negatives'

adata.X = adata.X.toarray()
adata.X[0:5] = -145
adata.X = sparse.csr_matrix(adata.X)
adata.X = adata.X.astype(int)
save_and_test(adata, 'True')

Datatype of raw.X matrix: float32
Format of raw.X matrix: csr
raw.X is all integers
---------
Datatype of X matrix: int64
Format of X matrix: csr
X is all integers
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:01.588810 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [16]:
# .X has cells with all zeros
adata = sc.read_h5ad("../valid.h5ad")
adata.uns['title'] = '614 .X has cells with all zeros'

adata.X = adata.X.toarray()
adata.X[0:5] = 0
adata.X = sparse.csc_matrix(adata.X)
print("Nonzero expression values in adata.X for first 5 cells: {}".format(adata[0:5,:].X.data))
save_and_test(adata, 'True')

Nonzero expression values in adata.X for first 5 cells: []
Datatype of raw.X matrix: float32
Format of raw.X matrix: csr
raw.X is all integers
---------
Datatype of X matrix: float32
Format of X matrix: csc
X contains non-integer values
------------------
Loading dependencies
Loading validator modules

Starting validation...
Matrices are in CSC format; loading entire dataset into memory.
Validation complete in 0:00:01.718755 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [17]:
#non-RNA assay without raw counts
adata = sc.read_h5ad("../valid.h5ad")
adata.uns['title'] = '614 non-RNA assay without raw counts'

adata.obs['assay_ontology_term_id'] = 'EFO:0007045' #ATAC-seq
adata.obs['suspension_type'] = 'nucleus'
del adata.raw
save_and_test(adata, 'True')

... storing 'assay_ontology_term_id' as categorical
... storing 'suspension_type' as categorical


raw slot is absent
---------
Datatype of X matrix: float32
Format of X matrix: csr
X contains non-integer values
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.426518 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [18]:
#non-RNA without raw counts but with raw.X
adata = sc.read_h5ad("../valid.h5ad")
adata.uns['title'] = '614 non-RNA without raw counts but with raw.X'

adata.obs['assay_ontology_term_id'] = 'EFO:0007045' #ATAC-seq
adata.obs['suspension_type'] = 'nucleus'
non_raw = ad.AnnData(X=adata.X, obs=adata.obs, var=adata.var)
non_raw.var.drop(columns='feature_is_filtered', inplace=True)
adata.raw = non_raw
save_and_test(adata, 'True')

... storing 'assay_ontology_term_id' as categorical
... storing 'suspension_type' as categorical


Datatype of raw.X matrix: float32
Format of raw.X matrix: csr
raw.X contains non-integer values
---------
Datatype of X matrix: float32
Format of X matrix: csr
X contains non-integer values
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.544245 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [19]:
#non-RNA assay has cells with zeros
adata = sc.read_h5ad("../valid.h5ad")
adata.uns['title'] = '614 non-RNA assay has cells with zeros'

adata.obs['assay_ontology_term_id'] = 'EFO:0007045' #ATAC-seq
adata.obs['suspension_type'] = 'nucleus'
adata.X = adata.X.toarray()
adata.X[0:5] = 0
adata.X = sparse.csr_matrix(adata.X)
del adata.raw
print("Nonzero expression values in adata.X first 5 cells: {}".format(adata[0:5,:].X.data))
save_and_test(adata, 'True')

... storing 'assay_ontology_term_id' as categorical
... storing 'suspension_type' as categorical


Nonzero expression values in adata.X first 5 cells: []
raw slot is absent
---------
Datatype of X matrix: float32
Format of X matrix: csr
X contains non-integer values
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.407274 with status is_valid=True
------------------
[1m[92mPASSED[0m


## Test Invalid Cases

In [20]:
# No raw counts
adata = sc.read_h5ad("../valid.h5ad")
del adata.raw
save_and_test(adata, 'False')

raw slot is absent
---------
Datatype of X matrix: float32
Format of X matrix: csr
X contains non-integer values
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: All non-zero values in raw matrix must be positive integers of type numpy.float32.
ERROR: Raw data is missing: there is only a normalized matrix in X and no raw.X
Validation complete in 0:00:01.473229 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [21]:
# The raw counts in adata.raw.X are float64 for csr
adata = sc.read_h5ad("../valid.h5ad")
raw = ad.AnnData(X=adata.raw.X, obs=adata.obs, var=adata.raw.var)
raw.X = raw.X.astype('float64')
adata.raw = raw
save_and_test(adata, 'False')

Datatype of raw.X matrix: float64
Format of raw.X matrix: csr
raw.X is all integers
---------
Datatype of X matrix: float32
Format of X matrix: csr
X contains non-integer values
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Raw matrix values must have type numpy.float32.
ERROR: Raw data may be missing: data in 'raw.X' does not meet schema requirements.
Validation complete in 0:00:00.536380 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [22]:
# The raw counts in adata.X are float64 for csr
adata = sc.read_h5ad("../valid.h5ad")
adata.X = adata.raw.X.astype('float64')
del adata.raw
save_and_test(adata, 'False')

raw slot is absent
---------
Datatype of X matrix: float64
Format of X matrix: csr
X is all integers
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Raw matrix values must have type numpy.float32.
ERROR: Raw data is missing: there is only a normalized matrix in X and no raw.X
Validation complete in 0:00:00.444967 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [23]:
# The raw counts in adata.raw.X are float16 for csr
adata = sc.read_h5ad("../valid.h5ad")
raw = ad.AnnData(X=adata.raw.X, obs=adata.obs, var=adata.raw.var)
raw.X = raw.X.astype('float16')
adata.raw = raw
save_and_test(adata, 'False')

Datatype of raw.X matrix: float16
Format of raw.X matrix: csr
raw.X contains non-integer values
---------
Datatype of X matrix: float32
Format of X matrix: csr
X contains non-integer values
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Raw matrix values must have type numpy.float32.
ERROR: Raw data may be missing: data in 'raw.X' does not meet schema requirements.
Validation complete in 0:00:00.535383 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [24]:
# The raw counts in adata.X are float16 for csr
adata = sc.read_h5ad("../valid.h5ad")
adata.X = adata.raw.X.astype('float16')
del adata.raw
save_and_test(adata, 'False')

raw slot is absent
---------
Datatype of X matrix: float16
Format of X matrix: csr
X contains non-integer values
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Raw matrix values must have type numpy.float32.
ERROR: Raw data is missing: there is only a normalized matrix in X and no raw.X
Validation complete in 0:00:00.405481 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [25]:
# The raw counts in adata.raw.X are integers for csr
adata = sc.read_h5ad("../valid.h5ad")
raw = ad.AnnData(X=adata.raw.X, obs=adata.obs, var=adata.raw.var)
raw.X = raw.X.astype(int)
adata.raw = raw
save_and_test(adata, 'False')

Datatype of raw.X matrix: int64
Format of raw.X matrix: csr
raw.X is all integers
---------
Datatype of X matrix: float32
Format of X matrix: csr
X contains non-integer values
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Raw matrix values must have type numpy.float32.
ERROR: Raw data may be missing: data in 'raw.X' does not meet schema requirements.
Validation complete in 0:00:00.511712 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [26]:
# The raw counts in adata.X are integers for csr
adata = sc.read_h5ad("../valid.h5ad")
adata.X = adata.raw.X.astype(int)
del adata.raw
save_and_test(adata, 'False')

raw slot is absent
---------
Datatype of X matrix: int64
Format of X matrix: csr
X is all integers
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Raw matrix values must have type numpy.float32.
ERROR: Raw data is missing: there is only a normalized matrix in X and no raw.X
Validation complete in 0:00:00.368212 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [27]:
# The raw counts in adata.raw.X are float64 for csc
adata = sc.read_h5ad("../valid.h5ad")
raw = ad.AnnData(X=adata.raw.X, obs=adata.obs, var=adata.raw.var)
raw.X = raw.X.astype('float64')
raw.X = sparse.csc_matrix(raw.X)
adata.raw = raw
save_and_test(adata, 'False')

Datatype of raw.X matrix: float64
Format of raw.X matrix: csc
raw.X is all integers
---------
Datatype of X matrix: float32
Format of X matrix: csr
X contains non-integer values
------------------
Loading dependencies
Loading validator modules

Starting validation...
Matrices are in CSC format; loading entire dataset into memory.
ERROR: Raw matrix values must have type numpy.float32.
ERROR: Raw data may be missing: data in 'raw.X' does not meet schema requirements.
Validation complete in 0:00:00.578565 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [28]:
# The raw counts in adata.X are float64 for csc
adata = sc.read_h5ad("../valid.h5ad")
adata.X = adata.raw.X.astype('float64')
adata.X = sparse.csc_matrix(adata.X)
del adata.raw
save_and_test(adata, 'False')

raw slot is absent
---------
Datatype of X matrix: float64
Format of X matrix: csc
X is all integers
------------------
Loading dependencies
Loading validator modules

Starting validation...
Matrices are in CSC format; loading entire dataset into memory.
ERROR: Raw matrix values must have type numpy.float32.
ERROR: Raw data is missing: there is only a normalized matrix in X and no raw.X
Validation complete in 0:00:00.431806 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [29]:
# The raw counts in adata.raw.X are float16 for csc
# Seems to convert to float32 at some point in the process

In [30]:
# The raw counts in adata.X are float16 for csc
# Seems to convert to float32 at some point in the process

In [31]:
# The raw counts in adata.raw.X are integers for csc
adata = sc.read_h5ad("../valid.h5ad")
raw = ad.AnnData(X=adata.raw.X, obs=adata.obs, var=adata.raw.var)
raw.X = raw.X.astype(int)
raw.X = sparse.csc_matrix(raw.X)
adata.raw = raw
save_and_test(adata, 'False')

Datatype of raw.X matrix: int64
Format of raw.X matrix: csc
raw.X is all integers
---------
Datatype of X matrix: float32
Format of X matrix: csr
X contains non-integer values
------------------
Loading dependencies
Loading validator modules

Starting validation...
Matrices are in CSC format; loading entire dataset into memory.
ERROR: Raw matrix values must have type numpy.float32.
ERROR: Raw data may be missing: data in 'raw.X' does not meet schema requirements.
Validation complete in 0:00:00.557051 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [32]:
# The raw counts in adata.X are integers for csc
adata = sc.read_h5ad("../valid.h5ad")
adata.X = adata.raw.X.astype(int)
adata.X = sparse.csc_matrix(adata.X)
del adata.raw
save_and_test(adata, 'False')

raw slot is absent
---------
Datatype of X matrix: int64
Format of X matrix: csc
X is all integers
------------------
Loading dependencies
Loading validator modules

Starting validation...
Matrices are in CSC format; loading entire dataset into memory.
ERROR: Raw matrix values must have type numpy.float32.
ERROR: Raw data is missing: there is only a normalized matrix in X and no raw.X
Validation complete in 0:00:00.384306 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [33]:
# The raw counts in adata.raw.X are float64 for np.ndarray
adata = sc.read_h5ad("../valid.h5ad")
raw = ad.AnnData(X=adata.raw.X, obs=adata.obs, var=adata.raw.var)
raw.X = raw.X.astype('float64')
raw.X = raw.X.toarray()
adata.raw = raw
save_and_test(adata, 'False')

Datatype of raw.X matrix: float64
Format of raw.X matrix: numpy array
raw.X is all integers
---------
Datatype of X matrix: float32
Format of X matrix: csr
X contains non-integer values
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Raw matrix values must have type numpy.float32.
ERROR: Raw data may be missing: data in 'raw.X' does not meet schema requirements.
Validation complete in 0:00:00.664475 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [34]:
# The raw counts in adata.X are float64 for np.ndarray
adata = sc.read_h5ad("../valid.h5ad")
adata.X = adata.raw.X.astype('float64')
adata.X = adata.X.toarray()
del adata.raw
save_and_test(adata, 'False')

raw slot is absent
---------
Datatype of X matrix: float64
Format of X matrix: numpy array
X is all integers
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Raw matrix values must have type numpy.float32.
ERROR: Raw data is missing: there is only a normalized matrix in X and no raw.X
Validation complete in 0:00:00.523946 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [35]:
# The raw counts in adata.raw.X are float16 for np.ndarray
# ValueError: Output dtype not compatible with inputs.

In [36]:
# The raw counts in adata.X are float16 for np.ndarray
# ValueError: Output dtype not compatible with inputs.

In [37]:
# The raw counts in adata.raw.X are integers for np.ndarray
adata = sc.read_h5ad("../valid.h5ad")
raw = ad.AnnData(X=adata.raw.X, obs=adata.obs, var=adata.raw.var)
raw.X = raw.X.astype(int)
raw.X = raw.X.toarray()
adata.raw = raw
save_and_test(adata, 'False')

Datatype of raw.X matrix: int64
Format of raw.X matrix: numpy array
raw.X is all integers
---------
Datatype of X matrix: float32
Format of X matrix: csr
X contains non-integer values
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Raw matrix values must have type numpy.float32.
ERROR: Raw data may be missing: data in 'raw.X' does not meet schema requirements.
Validation complete in 0:00:00.543127 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [38]:
# The raw counts in adata.X are integers for np.ndarray
adata = sc.read_h5ad("../valid.h5ad")
adata.X = adata.raw.X.astype(int)
adata.X = adata.X.toarray()
del adata.raw
save_and_test(adata, 'False')

raw slot is absent
---------
Datatype of X matrix: int64
Format of X matrix: numpy array
X is all integers
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Raw matrix values must have type numpy.float32.
ERROR: Raw data is missing: there is only a normalized matrix in X and no raw.X
Validation complete in 0:00:00.411815 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [39]:
#raw layer includes some negative int values - in adata.raw.X
adata = sc.read_h5ad("../valid.h5ad")
raw = ad.AnnData(X=adata.raw.X, obs=adata.obs, var=adata.raw.var)
raw.X = raw.X.toarray()
raw.X[0:5] = -145
raw.X = sparse.csr_matrix(raw.X)
adata.raw = raw
print("Nonzero expression values in adata.raw.X for first 5 cells: {}".format(adata[0:5,:].raw.X.data))
save_and_test(adata, 'False')

Nonzero expression values in adata.raw.X for first 5 cells: [-145. -145. -145. ... -145. -145. -145.]
Datatype of raw.X matrix: float32
Format of raw.X matrix: csr
raw.X is all integers
---------
Datatype of X matrix: float32
Format of X matrix: csr
X contains non-integer values
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: All non-zero values in raw matrix must be positive integers of type numpy.float32.
ERROR: Raw data may be missing: data in 'raw.X' does not meet schema requirements.
Validation complete in 0:00:01.637124 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [40]:
#raw layer includes some negative int values - in adata.X
adata = sc.read_h5ad("../valid.h5ad")
adata.X = raw.X.toarray()
adata.X[0:5] = -145
adata.X = sparse.csr_matrix(adata.X)
del adata.raw
print("Nonzero expression values in adata.X first 5 cells: {}".format(adata[0:5,:].X.data))
save_and_test(adata, 'False')

Nonzero expression values in adata.X first 5 cells: [-145. -145. -145. ... -145. -145. -145.]
raw slot is absent
---------
Datatype of X matrix: float32
Format of X matrix: csr
X is all integers
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: All non-zero values in raw matrix must be positive integers of type numpy.float32.
ERROR: Raw data is missing: there is only a normalized matrix in X and no raw.X
Validation complete in 0:00:01.538073 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [41]:
# The raw counts in adata.raw.X is csr and has zero counts for first 5 cells
adata = sc.read_h5ad("../valid.h5ad")
raw = ad.AnnData(X=adata.raw.X, obs=adata.obs, var=adata.raw.var)
raw.X = raw.X.toarray()
raw.X[0:5] = 0
raw.X = sparse.csr_matrix(raw.X)
adata.raw = raw
print("Nonzero expression values in adata.raw.X for first 5 cells: {}".format(adata[0:5,:].raw.X.data))
save_and_test(adata, 'False')

Nonzero expression values in adata.raw.X for first 5 cells: []
Datatype of raw.X matrix: float32
Format of raw.X matrix: csr
raw.X is all integers
---------
Datatype of X matrix: float32
Format of X matrix: csr
X contains non-integer values
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Each cell must have at least one non-zero value in its row in the raw matrix.
ERROR: Raw data may be missing: data in 'raw.X' does not meet schema requirements.
Validation complete in 0:00:01.683724 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [42]:
# The raw counts in adata.raw.X is csc and has zero counts for first 5 cells
adata = sc.read_h5ad("../valid.h5ad")
raw = ad.AnnData(X=adata.raw.X, obs=adata.obs, var=adata.raw.var)
raw.X = raw.X.toarray()
raw.X[0:5] = 0
raw.X = sparse.csc_matrix(raw.X)
adata.raw = raw
print("Nonzero expression values in adata.raw.X for first 5 cells: {}".format(adata[0:5,:].raw.X.data))
save_and_test(adata, 'False')

Nonzero expression values in adata.raw.X for first 5 cells: []
Datatype of raw.X matrix: float32
Format of raw.X matrix: csc
raw.X is all integers
---------
Datatype of X matrix: float32
Format of X matrix: csr
X contains non-integer values
------------------
Loading dependencies
Loading validator modules

Starting validation...
Matrices are in CSC format; loading entire dataset into memory.
ERROR: Each cell must have at least one non-zero value in its row in the raw matrix.
ERROR: Raw data may be missing: data in 'raw.X' does not meet schema requirements.
Validation complete in 0:00:02.167283 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [43]:
# The raw counts in adata.raw.X is np.ndarray and has zero counts for first 5 cells
adata = sc.read_h5ad("../valid.h5ad")
raw = ad.AnnData(X=adata.raw.X, obs=adata.obs, var=adata.raw.var)
raw.X = raw.X.toarray()
raw.X[0:5] = 0
adata.raw = raw
print("Nonzero expression values in adata.raw.X for first 5 cells: {}".format(adata[0:5,:].raw.X[~np.all(adata[0:5,:].raw.X == 0, axis=1)]))
save_and_test(adata, 'False')

Nonzero expression values in adata.raw.X for first 5 cells: []
Datatype of raw.X matrix: float32
Format of raw.X matrix: numpy array
raw.X is all integers
---------
Datatype of X matrix: float32
Format of X matrix: csr
X contains non-integer values
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Each cell must have at least one non-zero value in its row in the raw matrix.
ERROR: Raw data may be missing: data in 'raw.X' does not meet schema requirements.
Validation complete in 0:00:01.217033 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [44]:
# The raw counts in adata.X is csr and has zero counts for first 5 cells
adata = sc.read_h5ad("../valid.h5ad")
adata.X = adata.raw.X.toarray()
adata.X[0:5] = 0
adata.X = sparse.csr_matrix(adata.X)
del adata.raw
print("Nonzero expression values in adata.X first 5 cells: {}".format(adata[0:5,:].X.data))
save_and_test(adata, 'False')

Nonzero expression values in adata.X first 5 cells: []
raw slot is absent
---------
Datatype of X matrix: float32
Format of X matrix: csr
X is all integers
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Each cell must have at least one non-zero value in its row in the raw matrix.
ERROR: Raw data is missing: there is only a normalized matrix in X and no raw.X
Validation complete in 0:00:01.556297 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [45]:
# The raw counts in adata.X is csr and has zero counts for first 5 cells
adata = sc.read_h5ad("../valid.h5ad")
adata.X = adata.raw.X.toarray()
adata.X[0:5] = 0
adata.X = sparse.csc_matrix(adata.X)
del adata.raw
print("Nonzero expression values in adata.X first 5 cells: {}".format(adata[0:5,:].X.data))
save_and_test(adata, 'False')

Nonzero expression values in adata.X first 5 cells: []
raw slot is absent
---------
Datatype of X matrix: float32
Format of X matrix: csc
X is all integers
------------------
Loading dependencies
Loading validator modules

Starting validation...
Matrices are in CSC format; loading entire dataset into memory.
ERROR: Each cell must have at least one non-zero value in its row in the raw matrix.
ERROR: Raw data is missing: there is only a normalized matrix in X and no raw.X
Validation complete in 0:00:02.057178 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [46]:
# The raw counts in adata.X is csr and has zero counts for first 5 cells
adata = sc.read_h5ad("../valid.h5ad")
adata.X = adata.raw.X.toarray()
adata.X[0:5] = 0
del adata.raw
print("Nonzero expression values in adata.X first 5 cells: {}".format(adata[0:5,:].X[~np.all(adata[0:5,:].X == 0, axis=1)]))
save_and_test(adata, 'False')

Nonzero expression values in adata.X first 5 cells: []
raw slot is absent
---------
Datatype of X matrix: float32
Format of X matrix: numpy array
X is all integers
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Each cell must have at least one non-zero value in its row in the raw matrix.
ERROR: Raw data is missing: there is only a normalized matrix in X and no raw.X
Validation complete in 0:00:01.103737 with status is_valid=False
------------------
[1m[92mPASSED[0m
