https://github.com/chanzuckerberg/single-cell-curation/issues/513 <br>
https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#column_colors

In [24]:
#import cxg_upload
import matplotlib.colors as mcolors
import numpy as np
import os
import pandas as pd
import random
import scanpy as sc
import subprocess
from datetime import datetime

In [25]:
def validate(file):
    validate_process = subprocess.run(['cellxgene-schema', 'validate', file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    for line in validate_process.stdout.decode('utf-8').split('\n'):
        print(line)
    for line in validate_process.stderr.decode('utf-8').split('\n'):
        print(line)
        if 'is_valid=' in line:
            valid = line.split('=')[-1]
            return valid

In [26]:
def save_and_test(adata, expected):
    now = datetime.now() 
    dt_string = now.strftime('%m/%d %H:%M')
    adata.uns['title'] += (' - ' + dt_string)

    file = 'test.h5ad'
    adata.write(filename=file)
    test_adata = sc.read_h5ad(file)
    for k,v in test_adata.uns.items():
        if k.endswith('_colors'):
            print('--')
            print(k)
            print(v)
            obs_field = k.replace('_colors','')
            if obs_field in test_adata.obs.columns:
                uniq_vals = str(len(test_adata.obs[obs_field].unique()))
                val_type = str(test_adata.obs[obs_field].dtype)
                print(obs_field + ':' + uniq_vals + ' values of type:' + val_type)
            else:
                print(obs_field + ' not in obs')
            print('--')
    print('------------------')

    valid = validate(file)
    print('------------------')
    if expected != valid:
        print('\033[1m\033[91mERROR\033[0m')
    else:
        print('\033[1m\033[92mPASSED\033[0m')
        #if expected == 'True':
            #cxg_upload.upload(file)
    os.remove(file)

In [27]:
adata = sc.read_h5ad('../valid.h5ad')

In [28]:
df = pd.DataFrame(adata.obs.dtypes).reset_index()
df['unique len'] = df['index'].apply(lambda x: len(adata.obs[x].unique()))
df.set_index('index', inplace=True)
df

Unnamed: 0_level_0,0,unique len
index,Unnamed: 1_level_1,Unnamed: 2_level_1
BICCN_cluster_id,float64,61
QC,category,2000
BICCN_cluster_label,category,61
BICCN_subclass_label,category,17
BICCN_class_label,category,3
cluster_color,category,61
size,float64,55
temp_class_label,category,17
BICCN_ontology_term_id,category,17
assay_ontology_term_id,category,1


In [29]:
def create_color_list(count):
    # Generating a random number in between 0 and 2^24
    colors = random.sample(range(0, 2**24),count)

    # Converting that number from base-10
    # (decimal) to base-16 (hexadecimal)
    hex_colors = [hex(c).replace('0x','#') for c in colors]

    return hex_colors

**Test valid cases**

In [30]:
#untouched, no colors defined
save_and_test(adata, 'True')

------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:01.694509 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [31]:
#colors matches length of obs - named & hex
adata.uns['title'] = '513 colors matches length of obs'

adata.uns['temp_class_label_colors'] = np.array(create_color_list(17))
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 3))
save_and_test(adata, 'True')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 3))


--
BICCN_class_label_colors
['indianred' 'seashell' 'slategrey']
BICCN_class_label:3 values of type:category
--
--
temp_class_label_colors
['#87679b' '#d8643e' '#1d6f9c' '#d6217d' '#c092af' '#e3c4d0' '#31c190'
 '#7050a7' '#5f4ea7' '#8423d9' '#30654f' '#e064ad' '#3c67a5' '#5cdafd'
 '#d859a3' '#e09231' '#2dfa87']
temp_class_label:17 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Colors field uns[BICCN_class_label_colors] does not have a corresponding categorical field in obs
ERROR: Colors field uns[temp_class_label_colors] does not have a corresponding categorical field in obs
Validation complete in 0:00:00.515107 with status is_valid=False
------------------
[1m[91mERROR[0m


In [32]:
#colors exceeds length of obs - named & hex
adata.uns['title'] = '513 colors exceeds length of obs'

adata.uns['temp_class_label_colors'] = np.array(create_color_list(40))
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 12))
save_and_test(adata, 'True')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 12))


--
BICCN_class_label_colors
['plum' 'blue' 'dimgray' 'forestgreen' 'mediumblue' 'lavenderblush'
 'palegreen' 'deeppink' 'paleturquoise' 'darkgoldenrod' 'darksalmon'
 'yellowgreen']
BICCN_class_label:3 values of type:category
--
--
temp_class_label_colors
['#c03fef' '#9e23b9' '#7faf7c' '#1b3f83' '#b815fe' '#46bb64' '#c44e2'
 '#9967b7' '#acf053' '#f6d396' '#d1952e' '#8a9667' '#1924c9' '#fe5d70'
 '#ca46b9' '#362561' '#9034cc' '#e7c35' '#4b0d75' '#1e35ee' '#81e56a'
 '#5c05e1' '#5d5359' '#736b5e' '#519c25' '#6a4a00' '#54b0f6' '#dc44cb'
 '#3a8c38' '#1af69d' '#437864' '#d823a6' '#2b6340' '#86a243' '#5d9c7e'
 '#cab4ae' '#2fc6f6' '#a9433a' '#34a20f' '#a4c343']
temp_class_label:17 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Colors field uns[BICCN_class_label_colors] does not have a corresponding categorical field in obs
ERROR: Colors field uns[temp_class_label_colors] does not have a corresponding categorical field i

In [33]:
adata.uns = {'title': 'temporary title'}
#colors for non-schema *_ontology_term_id - hex
adata.uns['title'] = '513 colors for non-schema *_ontology_term_id'

adata.uns['BICCN_ontology_term_id_colors'] = np.array(create_color_list(17))
save_and_test(adata, 'True')

--
BICCN_ontology_term_id_colors
['#5f388e' '#7357bb' '#a6a810' '#a659f4' '#7c5971' '#b86ae9' '#270f65'
 '#117d02' '#85023c' '#d97741' '#193858' '#6e10ad' '#506ead' '#a78e14'
 '#34492e' '#cadd95' '#7a24d1']
BICCN_ontology_term_id:17 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Colors field uns[BICCN_ontology_term_id_colors] does not have a corresponding categorical field in obs
Validation complete in 0:00:00.577442 with status is_valid=False
------------------
[1m[91mERROR[0m


In [34]:
#colors for non-schema_ontology_term_id - named
adata.uns['title'] = '513 colors for non-schema_ontology_term_id'

adata.uns['BICCN_ontology_term_id_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 17))
save_and_test(adata, 'True')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['BICCN_ontology_term_id_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 17))


--
BICCN_ontology_term_id_colors
['azure' 'mistyrose' 'lightsteelblue' 'lightsalmon' 'sienna'
 'palevioletred' 'orchid' 'rosybrown' 'lightgoldenrodyellow'
 'blanchedalmond' 'darkorchid' 'limegreen' 'indianred' 'sandybrown'
 'lightgrey' 'magenta' 'oldlace']
BICCN_ontology_term_id:17 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Colors field uns[BICCN_ontology_term_id_colors] does not have a corresponding categorical field in obs
Validation complete in 0:00:00.523869 with status is_valid=False
------------------
[1m[91mERROR[0m


In [35]:
adata.uns = {'title': 'temporary title'}
#colors is for *_ontology_term_id = hex * named
adata.uns['title'] = '513 colors is for *_ontology_term_id'

adata.uns['sex_ontology_term_id_colors'] = np.array(create_color_list(2))
adata.uns['assay_ontology_term_id_colors'] = random.sample(mcolors.CSS4_COLORS.keys(), 1)
save_and_test(adata, 'True')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['assay_ontology_term_id_colors'] = random.sample(mcolors.CSS4_COLORS.keys(), 1)


--
assay_ontology_term_id_colors
['rebeccapurple']
assay_ontology_term_id:1 values of type:category
--
--
sex_ontology_term_id_colors
['#fe2469' '#9c87ac']
sex_ontology_term_id:2 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Colors field uns[assay_ontology_term_id_colors] does not have a corresponding categorical field in obs
ERROR: Colors field uns[sex_ontology_term_id_colors] does not have a corresponding categorical field in obs
Validation complete in 0:00:00.510250 with status is_valid=False
------------------
[1m[91mERROR[0m


In [36]:
#duplicate colors in a list - hex & named
adata.uns['title'] = '513 duplicate colors in a list'

adata.uns['BICCN_class_label_colors'] = random.sample(mcolors.CSS4_COLORS.keys(), 2)
adata.uns['BICCN_class_label_colors'] += adata.uns['BICCN_class_label_colors']
adata.uns['BICCN_class_label_colors'] = np.array(adata.uns['BICCN_class_label_colors'])

adata.uns['cell_type_colors'] = create_color_list(3)
adata.uns['cell_type_colors'] += adata.uns['cell_type_colors']
adata.uns['cell_type_colors'] = np.array(adata.uns['cell_type_colors'])
save_and_test(adata, 'True')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['BICCN_class_label_colors'] = random.sample(mcolors.CSS4_COLORS.keys(), 2)


--
BICCN_class_label_colors
['lightgoldenrodyellow' 'cyan' 'lightgoldenrodyellow' 'cyan']
BICCN_class_label:3 values of type:category
--
--
assay_ontology_term_id_colors
['rebeccapurple']
assay_ontology_term_id:1 values of type:category
--
--
cell_type_colors
['#af5b05' '#ef2305' '#f6826a' '#af5b05' '#ef2305' '#f6826a']
cell_type not in obs
--
--
sex_ontology_term_id_colors
['#fe2469' '#9c87ac']
sex_ontology_term_id:2 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Colors field uns[BICCN_class_label_colors] does not have a corresponding categorical field in obs
ERROR: Colors field uns[assay_ontology_term_id_colors] does not have a corresponding categorical field in obs
ERROR: Colors field uns[cell_type_colors] does not have a corresponding categorical field in obs
ERROR: Colors field uns[sex_ontology_term_id_colors] does not have a corresponding categorical field in obs
Validation complete in 0:00:00.519258 wit

**Test invalid cases**

In [None]:
adata.uns = {'title': 'temporary title'}
#invalid test for length of colors array is less than obs counterpart - named & hex
adata.uns['temp_class_label_colors'] = np.array(create_color_list(2))
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 1))
save_and_test(adata, 'False')

In [None]:
adata.uns = {'title': 'temporary title'}
#invalid test for colors array that is empty np array
adata.uns['BICCN_class_label_colors'] = np.array([])
save_and_test(adata, 'False')

In [None]:
adata.uns = {'title': 'temporary title'}
#invalid test for colors array that is None or np.nan
adata.uns['BICCN_class_label_colors'] = np.array([np.nan, np.nan, np.nan])
save_and_test(adata, 'False')

In [None]:
adata.uns = {'title': 'temporary title'}
#invalid test for colors array that is empty strings
adata.uns['BICCN_class_label_colors'] = np.array(['','',''])
save_and_test(adata, 'False')

In [None]:
adata.uns = {'title': 'temporary title'}
#invalid test for colors as string or dictionary
adata.uns['BICCN_class_label_colors'] = ','.join(create_color_list(17))
adata.uns['sex_ontology_term_id_colors'] = ','.join(random.sample(mcolors.CSS4_COLORS.keys(), 2))
save_and_test(adata, 'False')

In [None]:
adata.uns = {'title': 'temporary title'}
#colors for schema fields - named & hex
adata.uns['cell_type_colors'] = np.array(create_color_list(6))
adata.uns['sex_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 2))
save_and_test(adata, 'False')

In [None]:
adata.uns = {'title': 'temporary title'}
#colors does not have obs counterpart
adata.uns['author_cell_type_colors'] = np.array(create_color_list(17))
save_and_test(adata, 'False')

In [None]:
adata.uns = {'title': 'temporary title'}
#colors is mixed named & hex
adata.uns['BICCN_ontology_term_id_colors'] = \
    np.array(create_color_list(20) + random.sample(mcolors.CSS4_COLORS.keys(), 20))
save_and_test(adata, 'False')

In [None]:
adata.uns = {'title': 'temporary title'}
#colors is non-named or hex
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.BASE_COLORS.keys(), 3))
save_and_test(adata, 'False')

In [None]:
#colors is non-named or hex
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(list(mcolors.BASE_COLORS.values()), 3))
save_and_test(adata, 'False')

In [None]:
#colors is non-named or hex
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.XKCD_COLORS.keys(), 3))
save_and_test(adata, 'False')

In [None]:
#colors is non-named or hex
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.TABLEAU_COLORS.keys(), 3))
save_and_test(adata, 'False')

In [None]:
adata.uns = {'title': 'temporary title'}
#colors counterpart in obs is boolean
adata.uns['is_primary_data_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 1))
save_and_test(adata, 'False')

In [None]:
adata.uns = {'title': 'temporary title'}
#colors counterpart in obs is float
adata.uns['size_colors'] = np.array(create_color_list(17))
save_and_test(adata, 'False')

In [None]:
adata.uns = {'title': 'temporary title'}
adata.obs['BICCN_cluster_id'] = adata.obs['BICCN_cluster_id'].map(int)
#colors counterpart in obs is int
adata.uns['BICCN_cluster_id_colors'] = np.array(create_color_list(61))
save_and_test(adata, 'False')

In [None]:
#not np.array - couldn't test as array seems to be converted upon writing AnnData