https://github.com/chanzuckerberg/single-cell-curation/issues/513 <br>
https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#column_colors

In [8]:
import cxg_upload
import matplotlib.colors as mcolors
import numpy as np
import os
import pandas as pd
import random
import scanpy as sc
import subprocess
from datetime import datetime

In [9]:
def validate(file):
    validate_process = subprocess.run(['cellxgene-schema', 'validate', file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    for line in validate_process.stdout.decode('utf-8').split('\n'):
        print(line)
    for line in validate_process.stderr.decode('utf-8').split('\n'):
        print(line)
        if 'is_valid=' in line:
            valid = line.split('=')[-1]
            return valid

In [10]:
def save_and_test(adata, expected):
    now = datetime.now() 
    dt_string = now.strftime('%m/%d %H:%M')
    adata.uns['title'] += (' - ' + dt_string)

    file = 'test.h5ad'
    adata.write(filename=file)
    test_adata = sc.read_h5ad(file)
    for k,v in test_adata.uns.items():
        if k.endswith('_colors'):
            print('--')
            print(k)
            print(v)
            obs_field = k.replace('_colors','')
            if obs_field in test_adata.obs.columns:
                uniq_vals = str(len(test_adata.obs[obs_field].unique()))
                val_type = str(test_adata.obs[obs_field].dtype)
                print(obs_field + ':' + uniq_vals + ' values of type:' + val_type)
            else:
                print(obs_field + ' not in obs')
            print('--')
    print('------------------')

    valid = validate(file)
    print('------------------')
    if expected != valid:
        print('\033[1m\033[91mERROR\033[0m')
    else:
        print('\033[1m\033[92mPASSED\033[0m')
        if expected == 'True':
            cxg_upload.upload(file)
    os.remove(file)

In [11]:
adata = sc.read_h5ad('../valid.h5ad')

In [12]:
df = pd.DataFrame(adata.obs.dtypes).reset_index()
df['unique len'] = df['index'].apply(lambda x: len(adata.obs[x].unique()))
df.set_index('index', inplace=True)
df

Unnamed: 0_level_0,0,unique len
index,Unnamed: 1_level_1,Unnamed: 2_level_1
BICCN_cluster_id,float64,61
QC,category,2000
BICCN_cluster_label,category,61
BICCN_subclass_label,category,17
BICCN_class_label,category,3
cluster_color,category,61
size,float64,55
temp_class_label,category,17
BICCN_ontology_term_id,category,17
assay_ontology_term_id,category,1


In [13]:
def create_color_list(count):
    # Generating a random number in between 0 and 2^24
    colors = random.sample(range(0, 2**24),count)

    # Converting that number from base-10
    # (decimal) to base-16 (hexadecimal)
    hex_colors = [hex(c).replace('0x','#') for c in colors]

    return hex_colors

**Test valid cases**

In [14]:
#untouched, no colors defined
save_and_test(adata, 'True')

------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.835897 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [15]:
#colors matches length of obs - named & hex
adata.uns['title'] = '513 colors matches length of obs'

adata.uns['temp_class_label_colors'] = np.array(create_color_list(17))
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 3))
save_and_test(adata, 'True')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 3))


--
BICCN_class_label_colors
['orange' 'lightskyblue' 'rebeccapurple']
BICCN_class_label:3 values of type:category
--
--
temp_class_label_colors
['#b36e88' '#caec46' '#e1dbe0' '#bf372f' '#b285cc' '#e2e452' '#7533d2'
 '#455bd3' '#6f8a4e' '#3248e4' '#511c7b' '#33da70' '#37a073' '#374743'
 '#7723fc' '#716e4d' '#e7be8d']
temp_class_label:17 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.845156 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [16]:
#colors exceeds length of obs - named & hex
adata.uns['title'] = '513 colors exceeds length of obs'

adata.uns['temp_class_label_colors'] = np.array(create_color_list(40))
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 12))
save_and_test(adata, 'True')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 12))


--
BICCN_class_label_colors
['olive' 'mistyrose' 'violet' 'goldenrod' 'midnightblue' 'slategrey'
 'mediumblue' 'lavenderblush' 'darkgoldenrod' 'ghostwhite' 'tan'
 'blanchedalmond']
BICCN_class_label:3 values of type:category
--
--
temp_class_label_colors
['#602492' '#101e62' '#837d10' '#bfd301' '#bafc48' '#50eafa' '#62d4f'
 '#4e5374' '#ff8959' '#440e38' '#c251a1' '#b4a357' '#6e1619' '#b23184'
 '#806c4c' '#a48813' '#2cc5ff' '#f8c6ce' '#79f993' '#5e40bf' '#4062a9'
 '#3bab1f' '#f1db5e' '#2fbb02' '#ff067a' '#90fca2' '#1c63d4' '#7c1a04'
 '#9aca25' '#71e8e7' '#4fa9f9' '#62ddc8' '#abd911' '#f76cca' '#4f253b'
 '#d9f03d' '#a4fe6a' '#7ade37' '#a171f5' '#ed64e8']
temp_class_label:17 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.830164 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [17]:
adata.uns = {'title': 'temporary title'}
#colors for non-schema *_ontology_term_id - hex
adata.uns['title'] = '513 colors for non-schema *_ontology_term_id'

adata.uns['BICCN_ontology_term_id_colors'] = np.array(create_color_list(17))
save_and_test(adata, 'True')

--
BICCN_ontology_term_id_colors
['#4af0d3' '#ec1677' '#9dbeb9' '#241ff7' '#6fcfea' '#ca3c4e' '#b9ec55'
 '#1bea42' '#7faffb' '#99c178' '#3eae32' '#24ab4a' '#934abb' '#8a0867'
 '#e35b4e' '#390987' '#9bf9e6']
BICCN_ontology_term_id:17 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.834851 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [18]:
#colors for non-schema_ontology_term_id - named
adata.uns['title'] = '513 colors for non-schema_ontology_term_id'

adata.uns['BICCN_ontology_term_id_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 17))
save_and_test(adata, 'True')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['BICCN_ontology_term_id_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 17))


--
BICCN_ontology_term_id_colors
['mediumblue' 'rosybrown' 'burlywood' 'honeydew' 'darkcyan' 'dimgray'
 'mediumvioletred' 'black' 'peachpuff' 'lightgreen' 'mediumorchid'
 'palegoldenrod' 'rebeccapurple' 'lightsalmon' 'limegreen' 'linen'
 'olivedrab']
BICCN_ontology_term_id:17 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.834125 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [19]:
adata.uns = {'title': 'temporary title'}
#colors is for *_ontology_term_id = hex * named
adata.uns['title'] = '513 colors is for *_ontology_term_id'

adata.uns['sex_ontology_term_id_colors'] = np.array(create_color_list(2))
adata.uns['assay_ontology_term_id_colors'] = random.sample(mcolors.CSS4_COLORS.keys(), 1)
save_and_test(adata, 'True')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['assay_ontology_term_id_colors'] = random.sample(mcolors.CSS4_COLORS.keys(), 1)


--
assay_ontology_term_id_colors
['aliceblue']
assay_ontology_term_id:1 values of type:category
--
--
sex_ontology_term_id_colors
['#9fc3db' '#866db7']
sex_ontology_term_id:2 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.832303 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [20]:
#duplicate colors in a list - hex & named
adata.uns['title'] = '513 duplicate colors in a list'

adata.uns['BICCN_class_label_colors'] = random.sample(mcolors.CSS4_COLORS.keys(), 2)
adata.uns['BICCN_class_label_colors'] += adata.uns['BICCN_class_label_colors']
adata.uns['BICCN_class_label_colors'] = np.array(adata.uns['BICCN_class_label_colors'])

adata.uns['cell_type_colors'] = create_color_list(3)
adata.uns['cell_type_colors'] += adata.uns['cell_type_colors']
adata.uns['cell_type_colors'] = np.array(adata.uns['cell_type_colors'])
save_and_test(adata, 'True')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['BICCN_class_label_colors'] = random.sample(mcolors.CSS4_COLORS.keys(), 2)


--
BICCN_class_label_colors
['dimgray' 'grey' 'dimgray' 'grey']
BICCN_class_label:3 values of type:category
--
--
assay_ontology_term_id_colors
['aliceblue']
assay_ontology_term_id:1 values of type:category
--
--
cell_type_colors
['#a1c006' '#703058' '#f0a7be' '#a1c006' '#703058' '#f0a7be']
cell_type not in obs
--
--
sex_ontology_term_id_colors
['#9fc3db' '#866db7']
sex_ontology_term_id:2 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.841411 with status is_valid=True
------------------
[1m[92mPASSED[0m


**Test invalid cases**

In [21]:
adata.uns = {'title': 'temporary title'}
#invalid test for length of colors array is less than obs counterpart - named & hex
adata.uns['temp_class_label_colors'] = np.array(create_color_list(2))
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 1))
save_and_test(adata, 'False')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 1))


--
BICCN_class_label_colors
['tan']
BICCN_class_label:3 values of type:category
--
--
temp_class_label_colors
['#8192d3' '#1d3793']
temp_class_label:17 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.851006 with status is_valid=True
------------------
[1m[91mERROR[0m


In [22]:
adata.uns = {'title': 'temporary title'}
#invalid test for colors array that is empty np array
adata.uns['BICCN_class_label_colors'] = np.array([])
save_and_test(adata, 'False')

--
BICCN_class_label_colors
[]
BICCN_class_label:3 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.849144 with status is_valid=True
------------------
[1m[91mERROR[0m


In [23]:
adata.uns = {'title': 'temporary title'}
#invalid test for colors array that is None or np.nan
adata.uns['BICCN_class_label_colors'] = np.array([np.nan, np.nan, np.nan])
save_and_test(adata, 'False')

--
BICCN_class_label_colors
[nan nan nan]
BICCN_class_label:3 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.843677 with status is_valid=True
------------------
[1m[91mERROR[0m


In [24]:
adata.uns = {'title': 'temporary title'}
#invalid test for colors array that is empty strings
adata.uns['BICCN_class_label_colors'] = np.array(['','',''])
save_and_test(adata, 'False')

--
BICCN_class_label_colors
['' '' '']
BICCN_class_label:3 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.863752 with status is_valid=True
------------------
[1m[91mERROR[0m


In [25]:
adata.uns = {'title': 'temporary title'}
#invalid test for colors as string or dictionary
adata.uns['BICCN_class_label_colors'] = ','.join(create_color_list(17))
adata.uns['sex_ontology_term_id_colors'] = ','.join(random.sample(mcolors.CSS4_COLORS.keys(), 2))
save_and_test(adata, 'False')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['sex_ontology_term_id_colors'] = ','.join(random.sample(mcolors.CSS4_COLORS.keys(), 2))


--
BICCN_class_label_colors
#1879f5,#efeed0,#a92a3b,#61f5d6,#99161f,#d998a0,#122ab3,#23e4c0,#8c96a4,#da9a3e,#d44e0d,#880be4,#d3d39f,#c07171,#37418c,#dc11e4,#db90c9
BICCN_class_label:3 values of type:category
--
--
sex_ontology_term_id_colors
slategray,lightgoldenrodyellow
sex_ontology_term_id:2 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.983656 with status is_valid=True
------------------
[1m[91mERROR[0m


In [26]:
adata.uns = {'title': 'temporary title'}
#colors for schema fields - named & hex
adata.uns['cell_type_colors'] = np.array(create_color_list(6))
adata.uns['sex_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 2))
save_and_test(adata, 'False')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['sex_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 2))


--
cell_type_colors
['#761c1f' '#e461fd' '#f45ea9' '#a13907' '#488b1' '#d9ae44']
cell_type not in obs
--
--
sex_colors
['lightpink' 'lightgray']
sex not in obs
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.832762 with status is_valid=True
------------------
[1m[91mERROR[0m


In [27]:
adata.uns = {'title': 'temporary title'}
#colors does not have obs counterpart
adata.uns['author_cell_type_colors'] = np.array(create_color_list(17))
save_and_test(adata, 'False')

--
author_cell_type_colors
['#4a26a6' '#6c91ac' '#c05322' '#1541e0' '#45532f' '#ca0d36' '#2d4dfd'
 '#ea5b56' '#7174c0' '#471490' '#947537' '#d07f79' '#ca11f5' '#680bef'
 '#85d2b8' '#801adf' '#7d6e30']
author_cell_type not in obs
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.875487 with status is_valid=True
------------------
[1m[91mERROR[0m


In [28]:
adata.uns = {'title': 'temporary title'}
#colors is mixed named & hex
adata.uns['BICCN_ontology_term_id_colors'] = \
    np.array(create_color_list(20) + random.sample(mcolors.CSS4_COLORS.keys(), 20))
save_and_test(adata, 'False')

since Python 3.9 and will be removed in a subsequent version.
  np.array(create_color_list(20) + random.sample(mcolors.CSS4_COLORS.keys(), 20))


--
BICCN_ontology_term_id_colors
['#8e93f' '#8ca5e5' '#56d253' '#24510e' '#127eb3' '#6bdd47' '#6f7300'
 '#e494d2' '#acc3c9' '#fb1f95' '#d98953' '#132202' '#b1d331' '#bcafbf'
 '#f39088' '#45fd10' '#d3f373' '#6c671b' '#12f394' '#ef7735'
 'darkturquoise' 'fuchsia' 'gainsboro' 'mediumspringgreen' 'black' 'beige'
 'dodgerblue' 'skyblue' 'navajowhite' 'lemonchiffon' 'mediumaquamarine'
 'rebeccapurple' 'darkseagreen' 'lightslategrey' 'ghostwhite' 'tomato'
 'slategray' 'springgreen' 'lightcyan' 'indianred']
BICCN_ontology_term_id:17 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:01.027402 with status is_valid=True
------------------
[1m[91mERROR[0m


In [29]:
adata.uns = {'title': 'temporary title'}
#colors is non-named or hex
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.BASE_COLORS.keys(), 3))
save_and_test(adata, 'False')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.BASE_COLORS.keys(), 3))


--
BICCN_class_label_colors
['c' 'r' 'k']
BICCN_class_label:3 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.871584 with status is_valid=True
------------------
[1m[91mERROR[0m


In [30]:
#colors is non-named or hex
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(list(mcolors.BASE_COLORS.values()), 3))
save_and_test(adata, 'False')

--
BICCN_class_label_colors
[[0.   0.   0.  ]
 [0.75 0.75 0.  ]
 [0.   0.5  0.  ]]
BICCN_class_label:3 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.879610 with status is_valid=True
------------------
[1m[91mERROR[0m


In [31]:
#colors is non-named or hex
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.XKCD_COLORS.keys(), 3))
save_and_test(adata, 'False')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.XKCD_COLORS.keys(), 3))


--
BICCN_class_label_colors
['xkcd:greenish teal' 'xkcd:leather' 'xkcd:hunter green']
BICCN_class_label:3 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.851622 with status is_valid=True
------------------
[1m[91mERROR[0m


In [32]:
#colors is non-named or hex
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.TABLEAU_COLORS.keys(), 3))
save_and_test(adata, 'False')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.TABLEAU_COLORS.keys(), 3))


--
BICCN_class_label_colors
['tab:pink' 'tab:red' 'tab:gray']
BICCN_class_label:3 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.912127 with status is_valid=True
------------------
[1m[91mERROR[0m


In [33]:
adata.uns = {'title': 'temporary title'}
#colors counterpart in obs is boolean
adata.uns['is_primary_data_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 1))
save_and_test(adata, 'False')

since Python 3.9 and will be removed in a subsequent version.
  adata.uns['is_primary_data_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 1))


--
is_primary_data_colors
['palegoldenrod']
is_primary_data:1 values of type:bool
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.852724 with status is_valid=True
------------------
[1m[91mERROR[0m


In [34]:
adata.uns = {'title': 'temporary title'}
#colors counterpart in obs is float
adata.uns['size_colors'] = np.array(create_color_list(17))
save_and_test(adata, 'False')

--
size_colors
['#ea466c' '#40aaa1' '#520c7' '#d573b5' '#f79140' '#8cdc00' '#3a296e'
 '#28e29e' '#28702c' '#4ce739' '#886ac3' '#ef8bdb' '#ef4ffd' '#23167'
 '#dea1a5' '#db6a3c' '#4c800b']
size:55 values of type:float64
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.844212 with status is_valid=True
------------------
[1m[91mERROR[0m


In [35]:
adata.uns = {'title': 'temporary title'}
adata.obs['BICCN_cluster_id'] = adata.obs['BICCN_cluster_id'].map(int)
#colors counterpart in obs is int
adata.uns['BICCN_cluster_id_colors'] = np.array(create_color_list(61))
save_and_test(adata, 'False')

--
BICCN_cluster_id_colors
['#33b6f5' '#6b1be3' '#287431' '#c759ab' '#71a672' '#eca806' '#69b102'
 '#92546d' '#ae066' '#f285bf' '#a748dc' '#301cb6' '#c26be9' '#775fd1'
 '#174f97' '#81ad93' '#fabe17' '#1078a8' '#115a23' '#b13572' '#15954a'
 '#7da004' '#7f6ce9' '#f57ea2' '#cfec94' '#f3b979' '#a21b57' '#a2dbdc'
 '#f95441' '#6a0257' '#3dc9eb' '#8107f4' '#aed1d0' '#b0a85f' '#44bf6a'
 '#a9fb1a' '#be1a09' '#e67257' '#802a38' '#bbcb8' '#668ada' '#c3f50f'
 '#49c295' '#421baa' '#c6cb03' '#3075fe' '#d5bcfc' '#5db5b5' '#273693'
 '#4a7d75' '#f2deb5' '#86213b' '#6f391c' '#8723d9' '#98e054' '#c86759'
 '#4791f' '#c03dc8' '#6f2b56' '#8e7f0d' '#19d100']
BICCN_cluster_id:61 values of type:int64
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.848217 with status is_valid=True
------------------
[1m[91mERROR[0m


In [36]:
#not np.array - couldn't test as array seems to be converted upon writing AnnData