https://github.com/chanzuckerberg/single-cell-curation/issues/513 <br>
https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#column_colors

In [1]:
import matplotlib.colors as mcolors
import numpy as np
import os
import pandas as pd
import random
import scanpy as sc
import subprocess

In [2]:
def validate(file):
    validate_process = subprocess.run(['cellxgene-schema', 'validate', file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    for line in validate_process.stdout.decode('utf-8').split('\n'):
        print(line)
    for line in validate_process.stderr.decode('utf-8').split('\n'):
        print(line)
        if 'is_valid=' in line:
            valid = line.split('=')[-1]
            return valid

In [3]:
def save_and_test(adata, expected):
    adata.write(filename='test.h5ad')
    test_adata = sc.read_h5ad('test.h5ad')
    for k,v in test_adata.uns.items():
        if k.endswith('_colors'):
            print('--')
            print(k)
            print(v)
            obs_field = k.replace('_colors','')
            if obs_field in test_adata.obs.columns:
                uniq_vals = str(len(test_adata.obs[obs_field].unique()))
                val_type = str(test_adata.obs[obs_field].dtype)
                print(obs_field + ':' + uniq_vals + ' values of type:' + val_type)
            else:
                print(obs_field + ' not in obs')
            print('--')
    print('------------------')

    valid = validate('test.h5ad')
    print('------------------')
    if expected != valid:
        print('\033[1m\033[91mERROR\033[0m')
    else:
        print('\033[1m\033[92mPASSED\033[0m')
    os.remove('test.h5ad')

In [4]:
adata = sc.read_h5ad('../valid.h5ad')

In [5]:
df = pd.DataFrame(adata.obs.dtypes).reset_index()
df['unique len'] = df['index'].apply(lambda x: len(adata.obs[x].unique()))
df.set_index('index', inplace=True)
df

Unnamed: 0_level_0,0,unique len
index,Unnamed: 1_level_1,Unnamed: 2_level_1
BICCN_cluster_id,float64,61
QC,category,2000
BICCN_cluster_label,category,61
BICCN_subclass_label,category,17
BICCN_class_label,category,3
cluster_color,category,61
size,float64,55
temp_class_label,category,17
BICCN_ontology_term_id,category,17
assay_ontology_term_id,category,1


In [6]:
def create_color_list(count):
    # Generating a random number in between 0 and 2^24
    colors = random.sample(range(0, 2**24),count)

    # Converting that number from base-10
    # (decimal) to base-16 (hexadecimal)
    hex_colors = [hex(c).replace('0x','#') for c in colors]

    return hex_colors

**Test valid cases**

In [7]:
#untouched, no colors defined
save_and_test(adata, 'True')

------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.842838 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [8]:
#colors matches length of obs - named & hex
adata.uns['temp_class_label_colors'] = np.array(create_color_list(17))
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 3))
save_and_test(adata, 'True')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 3))


--
BICCN_class_label_colors
['darkgoldenrod' 'lawngreen' 'lightgrey']
BICCN_class_label:3 values of type:category
--
--
temp_class_label_colors
['#f9d05c' '#b70f5b' '#e0fcfd' '#d6bbc7' '#256a58' '#9b6d49' '#f675f3'
 '#a66f4f' '#21f39a' '#3cddd0' '#1d750d' '#d5ea0a' '#84345b' '#74b0c7'
 '#8cb84f' '#16da6e' '#b94371']
temp_class_label:17 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.828629 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [9]:
#colors exceeds length of obs - named & hex
adata.uns['temp_class_label_colors'] = np.array(create_color_list(40))
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 12))
save_and_test(adata, 'True')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 12))


--
BICCN_class_label_colors
['lightpink' 'pink' 'sienna' 'forestgreen' 'lightgrey' 'snow' 'burlywood'
 'darkturquoise' 'goldenrod' 'firebrick' 'olivedrab' 'blueviolet']
BICCN_class_label:3 values of type:category
--
--
temp_class_label_colors
['#55464e' '#f5e029' '#d59c39' '#badeae' '#36c26' '#b7ebfb' '#e4018'
 '#2a05c3' '#9ce656' '#8706f4' '#87de9e' '#969ab0' '#f67e17' '#1f1932'
 '#278188' '#f52b46' '#4d888f' '#fa3c44' '#341ea4' '#3bbe7' '#5666f0'
 '#7c07bd' '#ca3e51' '#dd91f2' '#8c9aa4' '#9075' '#87ffd5' '#f435c'
 '#56d8cf' '#1dfa5b' '#a5814d' '#217a46' '#869446' '#269531' '#c40837'
 '#c075bd' '#dcdd36' '#8eb36f' '#943689' '#4a2c90']
temp_class_label:17 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.902623 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [10]:
adata.uns = {'title': 'temporary title'}
#colors for non-schema *_ontology_term_id - hex
adata.uns['BICCN_ontology_term_id_colors'] = np.array(create_color_list(17))
save_and_test(adata, 'True')

--
BICCN_ontology_term_id_colors
['#63c1cf' '#92cb56' '#3fef1e' '#c793d0' '#a1c415' '#e20cc4' '#55f307'
 '#8a4da7' '#5e69e6' '#131a4a' '#b805c8' '#a78588' '#67300e' '#cde0bb'
 '#13897b' '#fd1178' '#b9fdc5']
BICCN_ontology_term_id:17 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.908282 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [11]:
#colors for non-schema_ontology_term_id - named
adata.uns['BICCN_ontology_term_id_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 17))
save_and_test(adata, 'True')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['BICCN_ontology_term_id_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 17))


--
BICCN_ontology_term_id_colors
['slategray' 'indigo' 'wheat' 'lightslategrey' 'cadetblue' 'darksalmon'
 'yellowgreen' 'dimgrey' 'blueviolet' 'tomato' 'maroon' 'burlywood'
 'brown' 'darkkhaki' 'darkmagenta' 'whitesmoke' 'seagreen']
BICCN_ontology_term_id:17 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.840888 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [12]:
adata.uns = {'title': 'temporary title'}
#colors is for *_ontology_term_id = hex * named
adata.uns['sex_ontology_term_id_colors'] = np.array(create_color_list(2))
adata.uns['assay_ontology_term_id_colors'] = random.sample(mcolors.CSS4_COLORS.keys(), 1)
save_and_test(adata, 'True')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['assay_ontology_term_id_colors'] = random.sample(mcolors.CSS4_COLORS.keys(), 1)


--
assay_ontology_term_id_colors
['hotpink']
assay_ontology_term_id:1 values of type:category
--
--
sex_ontology_term_id_colors
['#dc8124' '#9cd90d']
sex_ontology_term_id:2 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.840851 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [13]:
#duplicate colors in a list - hex & named
adata.uns['BICCN_class_label_colors'] = random.sample(mcolors.CSS4_COLORS.keys(), 2)
adata.uns['BICCN_class_label_colors'] += adata.uns['BICCN_class_label_colors']
adata.uns['BICCN_class_label_colors'] = np.array(adata.uns['BICCN_class_label_colors'])

adata.uns['cell_type_colors'] = create_color_list(3)
adata.uns['cell_type_colors'] += adata.uns['cell_type_colors']
adata.uns['cell_type_colors'] = np.array(adata.uns['cell_type_colors'])
save_and_test(adata, 'True')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['BICCN_class_label_colors'] = random.sample(mcolors.CSS4_COLORS.keys(), 2)


--
BICCN_class_label_colors
['dimgray' 'salmon' 'dimgray' 'salmon']
BICCN_class_label:3 values of type:category
--
--
assay_ontology_term_id_colors
['hotpink']
assay_ontology_term_id:1 values of type:category
--
--
cell_type_colors
['#576b84' '#4f1610' '#88a4fb' '#576b84' '#4f1610' '#88a4fb']
cell_type not in obs
--
--
sex_ontology_term_id_colors
['#dc8124' '#9cd90d']
sex_ontology_term_id:2 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.832948 with status is_valid=True
------------------
[1m[92mPASSED[0m


**Test invalid cases**

In [14]:
adata.uns = {'title': 'temporary title'}
#invalid test for length of colors array is less than obs counterpart - named & hex
adata.uns['temp_class_label_colors'] = np.array(create_color_list(2))
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 1))
save_and_test(adata, 'False')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 1))


--
BICCN_class_label_colors
['orange']
BICCN_class_label:3 values of type:category
--
--
temp_class_label_colors
['#10f41b' '#7f20df']
temp_class_label:17 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.828900 with status is_valid=True
------------------
[1m[91mERROR[0m


In [15]:
adata.uns = {'title': 'temporary title'}
#invalid test for colors array that is empty np array
adata.uns['BICCN_class_label_colors'] = np.array([])
save_and_test(adata, 'False')

--
BICCN_class_label_colors
[]
BICCN_class_label:3 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.828037 with status is_valid=True
------------------
[1m[91mERROR[0m


In [16]:
adata.uns = {'title': 'temporary title'}
#invalid test for colors array that is None or np.nan
adata.uns['BICCN_class_label_colors'] = np.array([np.nan, np.nan, np.nan])
save_and_test(adata, 'False')

--
BICCN_class_label_colors
[nan nan nan]
BICCN_class_label:3 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.827962 with status is_valid=True
------------------
[1m[91mERROR[0m


In [17]:
adata.uns = {'title': 'temporary title'}
#invalid test for colors array that is empty strings
adata.uns['BICCN_class_label_colors'] = np.array(['','',''])
save_and_test(adata, 'False')

--
BICCN_class_label_colors
['' '' '']
BICCN_class_label:3 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.830766 with status is_valid=True
------------------
[1m[91mERROR[0m


In [18]:
adata.uns = {'title': 'temporary title'}
#invalid test for colors as string or dictionary
adata.uns['BICCN_class_label_colors'] = ','.join(create_color_list(17))
adata.uns['sex_ontology_term_id_colors'] = ','.join(random.sample(mcolors.CSS4_COLORS.keys(), 2))
save_and_test(adata, 'False')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['sex_ontology_term_id_colors'] = ','.join(random.sample(mcolors.CSS4_COLORS.keys(), 2))


--
BICCN_class_label_colors
#d18a20,#c477fc,#ec5176,#a90c11,#566b12,#f51748,#d7fdf6,#f2e706,#e52740,#30d573,#e966d8,#a572fa,#a5571a,#837b78,#249c79,#d119fe,#b8f567
BICCN_class_label:3 values of type:category
--
--
sex_ontology_term_id_colors
blueviolet,lightgoldenrodyellow
sex_ontology_term_id:2 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.834110 with status is_valid=True
------------------
[1m[91mERROR[0m


In [19]:
adata.uns = {'title': 'temporary title'}
#colors for schema fields - named & hex
adata.uns['cell_type_colors'] = np.array(create_color_list(6))
adata.uns['sex_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 2))
save_and_test(adata, 'False')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['sex_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 2))


--
cell_type_colors
['#4ebf02' '#c9a7e' '#7cc677' '#fee897' '#74e39f' '#4593e5']
cell_type not in obs
--
--
sex_colors
['mediumpurple' 'darkcyan']
sex not in obs
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.851521 with status is_valid=True
------------------
[1m[91mERROR[0m


In [20]:
adata.uns = {'title': 'temporary title'}
#colors does not have obs counterpart
adata.uns['author_cell_type_colors'] = np.array(create_color_list(17))
save_and_test(adata, 'False')

--
author_cell_type_colors
['#b3eca6' '#b3d473' '#bfcd0d' '#badd48' '#c58308' '#c1a967' '#13955e'
 '#240a3' '#bc3b17' '#3051e4' '#200417' '#52dfac' '#b5a826' '#cf797a'
 '#cad48c' '#a678fd' '#12c6ea']
author_cell_type not in obs
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.827955 with status is_valid=True
------------------
[1m[91mERROR[0m


In [21]:
adata.uns = {'title': 'temporary title'}
#colors is mixed named & hex
adata.uns['BICCN_ontology_term_id_colors'] = \
    np.array(create_color_list(20) + random.sample(mcolors.CSS4_COLORS.keys(), 20))
save_and_test(adata, 'False')

since Python 3.9 and will be removed in a subsequent version.
  np.array(create_color_list(20) + random.sample(mcolors.CSS4_COLORS.keys(), 20))


--
BICCN_ontology_term_id_colors
['#9ec3d0' '#18002d' '#fe5868' '#4ce6b9' '#6d6c4d' '#aaa922' '#c9f4e4'
 '#a5f62c' '#449bb7' '#c75dac' '#13716c' '#43a60b' '#df0b03' '#eb6888'
 '#90630c' '#e5ae97' '#3a988a' '#59da08' '#8d0b1e' '#6b51c1' 'lightgreen'
 'darkolivegreen' 'darkkhaki' 'tomato' 'firebrick' 'red' 'blanchedalmond'
 'dimgrey' 'wheat' 'goldenrod' 'mediumblue' 'darkorange' 'aqua' 'cyan'
 'olivedrab' 'palegoldenrod' 'khaki' 'ivory' 'fuchsia' 'lime']
BICCN_ontology_term_id:17 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.857961 with status is_valid=True
------------------
[1m[91mERROR[0m


In [22]:
adata.uns = {'title': 'temporary title'}
#colors is non-named or hex
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.BASE_COLORS.keys(), 3))
save_and_test(adata, 'False')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.BASE_COLORS.keys(), 3))


--
BICCN_class_label_colors
['g' 'r' 'w']
BICCN_class_label:3 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.861840 with status is_valid=True
------------------
[1m[91mERROR[0m


In [23]:
#colors is non-named or hex
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(list(mcolors.BASE_COLORS.values()), 3))
save_and_test(adata, 'False')

--
BICCN_class_label_colors
[[0.75 0.   0.75]
 [0.   0.   1.  ]
 [0.75 0.75 0.  ]]
BICCN_class_label:3 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.843230 with status is_valid=True
------------------
[1m[91mERROR[0m


In [25]:
#colors is non-named or hex
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.XKCD_COLORS.keys(), 3))
save_and_test(adata, 'False')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.XKCD_COLORS.keys(), 3))


--
BICCN_class_label_colors
['xkcd:burnt sienna' 'xkcd:twilight' 'xkcd:pale orange']
BICCN_class_label:3 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.841223 with status is_valid=True
------------------
[1m[91mERROR[0m


In [26]:
#colors is non-named or hex
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.TABLEAU_COLORS.keys(), 3))
save_and_test(adata, 'False')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.TABLEAU_COLORS.keys(), 3))


--
BICCN_class_label_colors
['tab:orange' 'tab:green' 'tab:cyan']
BICCN_class_label:3 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.839049 with status is_valid=True
------------------
[1m[91mERROR[0m


In [27]:
adata.uns = {'title': 'temporary title'}
#colors counterpart in obs is boolean
adata.uns['is_primary_data_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 1))
save_and_test(adata, 'False')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['is_primary_data_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 1))


--
is_primary_data_colors
['white']
is_primary_data:1 values of type:bool
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.832784 with status is_valid=True
------------------
[1m[91mERROR[0m


In [28]:
adata.uns = {'title': 'temporary title'}
#colors counterpart in obs is float
adata.uns['size_colors'] = np.array(create_color_list(17))
save_and_test(adata, 'False')

--
size_colors
['#3c9c72' '#4e007e' '#543712' '#1af883' '#a80d82' '#fc25b2' '#eb5433'
 '#f6e95d' '#2d2c1b' '#c0a4ef' '#9282f2' '#d044bc' '#2a7913' '#a3ae59'
 '#cf9cfd' '#4f6ff' '#a242e1']
size:55 values of type:float64
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.901279 with status is_valid=True
------------------
[1m[91mERROR[0m


In [29]:
adata.uns = {'title': 'temporary title'}
adata.obs['BICCN_cluster_id'] = adata.obs['BICCN_cluster_id'].map(int)
#colors counterpart in obs is int
adata.uns['BICCN_cluster_id_colors'] = np.array(create_color_list(61))
save_and_test(adata, 'False')

--
BICCN_cluster_id_colors
['#3ec48c' '#fdb82f' '#e16d27' '#f0034f' '#cf1274' '#60af21' '#faf8fb'
 '#6eaca7' '#76523d' '#90d94c' '#95cd6e' '#d677b6' '#648e58' '#40e236'
 '#a06024' '#df0142' '#e82262' '#1f407f' '#2ac226' '#75eb' '#aae970'
 '#6587a4' '#3cc437' '#90feb0' '#ea0611' '#43f800' '#74e1e6' '#b069df'
 '#11374b' '#60c5ac' '#555408' '#f765f7' '#26969b' '#2e818c' '#e955b6'
 '#1e4a75' '#500773' '#58594' '#c2f2f4' '#d39151' '#e9b8f0' '#58e81d'
 '#1cd1b5' '#b2c57b' '#21009c' '#98a864' '#b29bdb' '#8d8036' '#7c0ded'
 '#fd9bac' '#761838' '#4ae565' '#28e527' '#80ab01' '#80cc38' '#8312f1'
 '#ed7159' '#c03075' '#dadfa0' '#d82c09' '#930aee']
BICCN_cluster_id:61 values of type:int64
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.924559 with status is_valid=True
------------------
[1m[91mERROR[0m


In [31]:
#not np.array
adata.uns['temp_class_label_colors'] = create_color_list(17)
adata.uns['BICCN_class_label_colors'] = random.sample(mcolors.CSS4_COLORS.keys(), 3)
save_and_test(adata, 'False')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['BICCN_class_label_colors'] = random.sample(mcolors.CSS4_COLORS.keys(), 3)


--
BICCN_class_label_colors
['darkred' 'purple' 'navajowhite']
BICCN_class_label:3 values of type:category
--
--
BICCN_cluster_id_colors
['#3ec48c' '#fdb82f' '#e16d27' '#f0034f' '#cf1274' '#60af21' '#faf8fb'
 '#6eaca7' '#76523d' '#90d94c' '#95cd6e' '#d677b6' '#648e58' '#40e236'
 '#a06024' '#df0142' '#e82262' '#1f407f' '#2ac226' '#75eb' '#aae970'
 '#6587a4' '#3cc437' '#90feb0' '#ea0611' '#43f800' '#74e1e6' '#b069df'
 '#11374b' '#60c5ac' '#555408' '#f765f7' '#26969b' '#2e818c' '#e955b6'
 '#1e4a75' '#500773' '#58594' '#c2f2f4' '#d39151' '#e9b8f0' '#58e81d'
 '#1cd1b5' '#b2c57b' '#21009c' '#98a864' '#b29bdb' '#8d8036' '#7c0ded'
 '#fd9bac' '#761838' '#4ae565' '#28e527' '#80ab01' '#80cc38' '#8312f1'
 '#ed7159' '#c03075' '#dadfa0' '#d82c09' '#930aee']
BICCN_cluster_id:61 values of type:int64
--
--
temp_class_label_colors
['#5dddd3' '#c041e1' '#535cda' '#accb2a' '#825f4c' '#8a71f4' '#67fd7'
 '#4f91d8' '#e4bc5a' '#552496' '#75db37' '#dd1396' '#27d7c9' '#bcfdce'
 '#926780' '#4c76b5' '#208573']
te