https://github.com/chanzuckerberg/single-cell-curation/issues/513 <br>
https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#column_colors

In [1]:
#import cxg_upload
import matplotlib.colors as mcolors
import numpy as np
import os
import pandas as pd
import random
import scanpy as sc
import subprocess
from datetime import datetime

In [2]:
def validate(file):
    validate_process = subprocess.run(['cellxgene-schema', 'validate', file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    for line in validate_process.stdout.decode('utf-8').split('\n'):
        print(line)
    for line in validate_process.stderr.decode('utf-8').split('\n'):
        print(line)
        if 'is_valid=' in line:
            valid = line.split('=')[-1]
            return valid

In [3]:
def save_and_test(adata, expected):
    now = datetime.now() 
    dt_string = now.strftime('%m/%d %H:%M')
    adata.uns['title'] += (' - ' + dt_string)

    file = 'test.h5ad'
    adata.write(filename=file)
    test_adata = sc.read_h5ad(file)
    for k,v in test_adata.uns.items():
        if k.endswith('_colors'):
            print('--')
            print(k)
            print(v)
            obs_field = k.replace('_colors','')
            if obs_field in test_adata.obs.columns:
                uniq_vals = str(len(test_adata.obs[obs_field].unique()))
                val_type = str(test_adata.obs[obs_field].dtype)
                print(obs_field + ':' + uniq_vals + ' values of type:' + val_type)
            else:
                print(obs_field + ' not in obs')
            print('--')
    print('------------------')

    valid = validate(file)
    print('------------------')
    if expected != valid:
        print('\033[1m\033[91mERROR\033[0m')
    else:
        print('\033[1m\033[92mPASSED\033[0m')
        #if expected == 'True':
            #cxg_upload.upload(file)
    os.remove(file)

In [4]:
adata = sc.read_h5ad('../valid.h5ad')

In [5]:
df = pd.DataFrame(adata.obs.dtypes).reset_index()
df['unique len'] = df['index'].apply(lambda x: len(adata.obs[x].unique()))
df.set_index('index', inplace=True)
df

Unnamed: 0_level_0,0,unique len
index,Unnamed: 1_level_1,Unnamed: 2_level_1
BICCN_cluster_id,float64,61
QC,category,2000
BICCN_cluster_label,category,61
BICCN_subclass_label,category,17
BICCN_class_label,category,3
cluster_color,category,61
size,float64,55
temp_class_label,category,17
BICCN_ontology_term_id,category,17
assay_ontology_term_id,category,1


In [6]:
def create_color_list(count):
    return random.sample(list(mcolors.cnames.values()), count)

**Test valid cases**

In [7]:
#untouched, no colors defined
save_and_test(adata, 'True')

------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:01.687781 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [8]:
#colors matches length of obs - named & hex
adata.uns['title'] = '513 colors matches length of obs'

adata.uns['temp_class_label_colors'] = np.array(create_color_list(17))
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(list(mcolors.CSS4_COLORS.keys()), 3))
save_and_test(adata, 'True')

--
BICCN_class_label_colors
['blueviolet' 'cornsilk' 'dimgrey']
BICCN_class_label:3 values of type:category
--
--
temp_class_label_colors
['#FFF5EE' '#B8860B' '#BA55D3' '#6B8E23' '#F8F8FF' '#696969' '#FF6347'
 '#F5FFFA' '#9ACD32' '#9370DB' '#C0C0C0' '#98FB98' '#40E0D0' '#6A5ACD'
 '#48D1CC' '#00FF00' '#A0522D']
temp_class_label:17 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:01.733874 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [9]:
#colors exceeds length of obs - named & hex
adata.uns['title'] = '513 colors exceeds length of obs'

adata.uns['temp_class_label_colors'] = np.array([c.lower() for c in create_color_list(40)])
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(list(mcolors.CSS4_COLORS.keys()), 12))
save_and_test(adata, 'True')

--
BICCN_class_label_colors
['palegreen' 'mediumblue' 'papayawhip' 'ivory' 'honeydew' 'darkslategray'
 'lightskyblue' 'orange' 'darkmagenta' 'darkseagreen' 'rosybrown' 'green']
BICCN_class_label:3 values of type:category
--
--
temp_class_label_colors
['#5f9ea0' '#fffacd' '#40e0d0' '#f4a460' '#f8f8ff' '#808080' '#ff00ff'
 '#4682b4' '#c71585' '#cd5c5c' '#bdb76b' '#f08080' '#808000' '#7cfc00'
 '#00ced1' '#000000' '#daa520' '#ffff00' '#f5fffa' '#708090' '#191970'
 '#8b008b' '#deb887' '#bc8f8f' '#008080' '#ff69b4' '#00ffff' '#ffe4b5'
 '#00008b' '#00ff00' '#00bfff' '#4b0082' '#ffdead' '#ffa07a' '#9400d3'
 '#008b8b' '#0000ff' '#006400' '#4169e1' '#e9967a']
temp_class_label:17 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:01.669568 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [10]:
adata.uns = {'title': 'temporary title'}
#colors for non-schema *_ontology_term_id - hex
adata.uns['title'] = '513 colors for non-schema *_ontology_term_id'

adata.uns['BICCN_ontology_term_id_colors'] = np.array(create_color_list(17))
save_and_test(adata, 'True')

--
BICCN_ontology_term_id_colors
['#FF8C00' '#9400D3' '#A52A2A' '#F8F8FF' '#F5F5DC' '#4169E1' '#800080'
 '#DDA0DD' '#708090' '#8B008B' '#F0FFFF' '#FFF5EE' '#FFE4B5' '#778899'
 '#D2B48C' '#800000' '#4B0082']
BICCN_ontology_term_id:17 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:01.641147 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [11]:
#colors for non-schema_ontology_term_id - named
adata.uns['title'] = '513 colors for non-schema_ontology_term_id'

adata.uns['BICCN_ontology_term_id_colors'] = np.array(random.sample(list(mcolors.CSS4_COLORS.keys()), 17))
save_and_test(adata, 'True')

--
BICCN_ontology_term_id_colors
['royalblue' 'lightgreen' 'mediumseagreen' 'darkred' 'darkgreen' 'yellow'
 'brown' 'darkviolet' 'rebeccapurple' 'pink' 'orangered' 'palevioletred'
 'ivory' 'violet' 'dodgerblue' 'red' 'turquoise']
BICCN_ontology_term_id:17 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:01.697771 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [12]:
adata.uns = {'title': 'temporary title'}
#colors is for *_ontology_term_id = hex * named
adata.uns['title'] = '513 colors is for *_ontology_term_id'

adata.uns['sex_ontology_term_id_colors'] = np.array(create_color_list(2))
adata.uns['assay_ontology_term_id_colors'] = random.sample(list(mcolors.CSS4_COLORS.keys()), 1)
save_and_test(adata, 'True')

--
assay_ontology_term_id_colors
['silver']
assay_ontology_term_id:1 values of type:category
--
--
sex_ontology_term_id_colors
['#FF4500' '#4B0082']
sex_ontology_term_id:2 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:01.669604 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [13]:
adata.uns = {'title': 'temporary title'}
#duplicate colors in a list - hex & named
adata.uns['title'] = '513 duplicate colors in a list'

adata.uns['BICCN_class_label_colors'] = random.sample(list(mcolors.CSS4_COLORS.keys()), 2)
adata.uns['BICCN_class_label_colors'] += adata.uns['BICCN_class_label_colors']
adata.uns['BICCN_class_label_colors'] = np.array(adata.uns['BICCN_class_label_colors'])

adata.uns['cell_type_ontology_term_id_colors'] = create_color_list(3)
adata.uns['cell_type_ontology_term_id_colors'] += adata.uns['cell_type_ontology_term_id_colors']
adata.uns['cell_type_ontology_term_id_colors'] = np.array(adata.uns['cell_type_ontology_term_id_colors'])
save_and_test(adata, 'True')

--
BICCN_class_label_colors
['chocolate' 'white' 'chocolate' 'white']
BICCN_class_label:3 values of type:category
--
--
cell_type_ontology_term_id_colors
['#808080' '#ADFF2F' '#778899' '#808080' '#ADFF2F' '#778899']
cell_type_ontology_term_id:6 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:01.696699 with status is_valid=True
------------------
[1m[92mPASSED[0m


**Test invalid cases**

In [14]:
adata.uns = {'title': 'temporary title'}
#invalid test for length of colors array is less than obs counterpart - named & hex
adata.uns['temp_class_label_colors'] = np.array(create_color_list(2))
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(list(mcolors.CSS4_COLORS.keys()), 1))
save_and_test(adata, 'False')

--
BICCN_class_label_colors
['lightcoral']
BICCN_class_label:3 values of type:category
--
--
temp_class_label_colors
['#A0522D' '#00FF00']
temp_class_label:17 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Annotated categorical field BICCN_class_label must have at least 3 color options in uns[BICCN_class_label_colors]. Found: ['lightcoral']
ERROR: Annotated categorical field temp_class_label must have at least 17 color options in uns[temp_class_label_colors]. Found: ['#A0522D' '#00FF00']
Validation complete in 0:00:00.522046 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [15]:
adata.uns = {'title': 'temporary title'}
#invalid test for colors array that is empty np array
adata.uns['BICCN_class_label_colors'] = np.array([])
save_and_test(adata, 'False')

--
BICCN_class_label_colors
[]
BICCN_class_label:3 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Annotated categorical field BICCN_class_label must have at least 3 color options in uns[BICCN_class_label_colors]. Found: []
Validation complete in 0:00:00.526532 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [16]:
adata.uns = {'title': 'temporary title'}
#invalid test for colors array that is None or np.nan
adata.uns['BICCN_class_label_colors'] = np.array([np.nan, np.nan, np.nan])
save_and_test(adata, 'False')

--
BICCN_class_label_colors
[nan nan nan]
BICCN_class_label:3 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Colors in uns[BICCN_class_label_colors] must be strings. Found: [nan nan nan] which are float64
Validation complete in 0:00:00.521479 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [17]:
adata.uns = {'title': 'temporary title'}
#invalid test for colors array that is empty strings
adata.uns['BICCN_class_label_colors'] = np.array(['','',''])
save_and_test(adata, 'False')

--
BICCN_class_label_colors
['' '' '']
BICCN_class_label:3 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Colors in uns[BICCN_class_label_colors] must be either all hex colors or all CSS4 named colors. Found: ['' '' '']
Validation complete in 0:00:00.516561 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [18]:
adata.uns = {'title': 'temporary title'}
#invalid test for colors as string or dictionary
adata.uns['BICCN_class_label_colors'] = ','.join(create_color_list(17))
adata.uns['sex_ontology_term_id_colors'] = ','.join(random.sample(list(mcolors.CSS4_COLORS.keys()), 2))
save_and_test(adata, 'False')

--
BICCN_class_label_colors
#B22222,#2F4F4F,#FF00FF,#6A5ACD,#0000CD,#FFEBCD,#483D8B,#B0E0E6,#DA70D6,#708090,#663399,#CD5C5C,#191970,#FFE4C4,#7FFF00,#8B4513,#20B2AA
BICCN_class_label:3 values of type:category
--
--
sex_ontology_term_id_colors
darkturquoise,azure
sex_ontology_term_id:2 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Colors field uns['BICCN_class_label_colors'] must be of 'numpy.ndarray' type, it is <class 'str'>
ERROR: Colors field uns['sex_ontology_term_id_colors'] must be of 'numpy.ndarray' type, it is <class 'str'>
Validation complete in 0:00:00.519461 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [19]:
adata.uns = {'title': 'temporary title'}
#colors for schema fields - named & hex
adata.uns['cell_type_colors'] = np.array(create_color_list(6))
adata.uns['sex_colors'] = np.array(random.sample(list(mcolors.CSS4_COLORS.keys()), 2))
save_and_test(adata, 'False')

--
cell_type_colors
['#AFEEEE' '#F08080' '#4169E1' '#F4A460' '#778899' '#FFFACD']
cell_type not in obs
--
--
sex_colors
['lemonchiffon' 'slategrey']
sex not in obs
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Colors field uns[cell_type_colors] does not have a corresponding categorical field in obs. Annotate cell_type_ontology_term_id_colors instead
ERROR: Colors field uns[sex_colors] does not have a corresponding categorical field in obs. Annotate sex_ontology_term_id_colors instead
Validation complete in 0:00:00.524661 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [20]:
adata.uns = {'title': 'temporary title'}
#colors does not have obs counterpart
adata.uns['author_cell_type_colors'] = np.array(create_color_list(17))
save_and_test(adata, 'False')

--
author_cell_type_colors
['#2F4F4F' '#FFEBCD' '#F4A460' '#DAA520' '#2E8B57' '#FFFFFF' '#CD853F'
 '#6B8E23' '#00FF7F' '#8B4513' '#D8BFD8' '#A9A9A9' '#FAFAD2' '#D3D3D3'
 '#B0C4DE' '#228B22' '#F0F8FF']
author_cell_type not in obs
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Colors field uns[author_cell_type_colors] does not have a corresponding categorical field in obs
Validation complete in 0:00:00.531041 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [21]:
adata.uns = {'title': 'temporary title'}
#colors is mixed named & hex
adata.uns['BICCN_ontology_term_id_colors'] = \
    np.array(create_color_list(20) + random.sample(list(mcolors.CSS4_COLORS.keys()), 20))
save_and_test(adata, 'False')

--
BICCN_ontology_term_id_colors
['#FFFFF0' '#7FFF00' '#006400' '#BA55D3' '#BDB76B' '#66CDAA' '#FAFAD2'
 '#EE82EE' '#8A2BE2' '#87CEFA' '#483D8B' '#000080' '#BC8F8F' '#ADD8E6'
 '#FF6347' '#F5F5DC' '#E6E6FA' '#98FB98' '#FFC0CB' '#DDA0DD' 'navajowhite'
 'oldlace' 'sandybrown' 'beige' 'darkmagenta' 'ghostwhite' 'rebeccapurple'
 'lime' 'aliceblue' 'wheat' 'pink' 'darkcyan' 'skyblue' 'tan'
 'mediumslateblue' 'slategray' 'mediumturquoise' 'darkviolet'
 'darkslategrey' 'limegreen']
BICCN_ontology_term_id:17 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Colors in uns[BICCN_ontology_term_id_colors] must be either all hex colors or all CSS4 named colors. Found: ['#FFFFF0' '#7FFF00' '#006400' '#BA55D3' '#BDB76B' '#66CDAA' '#FAFAD2'
 '#EE82EE' '#8A2BE2' '#87CEFA' '#483D8B' '#000080' '#BC8F8F' '#ADD8E6'
 '#FF6347' '#F5F5DC' '#E6E6FA' '#98FB98' '#FFC0CB' '#DDA0DD' 'navajowhite'
 'oldlace' 'sandybrown' 'beige' 'darkmagenta' 

In [22]:
adata.uns = {'title': 'temporary title'}
#colors is non-named or hex
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(list(mcolors.BASE_COLORS.keys()), 3))
save_and_test(adata, 'False')

--
BICCN_class_label_colors
['r' 'b' 'k']
BICCN_class_label:3 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Colors in uns[BICCN_class_label_colors] must be either all hex colors or all CSS4 named colors. Found: ['r' 'b' 'k']
Validation complete in 0:00:00.526548 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [23]:
#colors is non-named or hex
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(list(mcolors.BASE_COLORS.values()), 3))
save_and_test(adata, 'False')

--
BICCN_class_label_colors
[[0.   0.   1.  ]
 [0.75 0.   0.75]
 [0.   0.75 0.75]]
BICCN_class_label:3 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Colors in uns[BICCN_class_label_colors] must be strings. Found: [[0.   0.   1.  ]
 [0.75 0.   0.75]
 [0.   0.75 0.75]] which are float64
Validation complete in 0:00:00.526853 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [24]:
#colors is non-named or hex
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(list(mcolors.XKCD_COLORS.keys()), 3))
save_and_test(adata, 'False')

--
BICCN_class_label_colors
['xkcd:blue/purple' 'xkcd:dark red' 'xkcd:bluish grey']
BICCN_class_label:3 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Colors in uns[BICCN_class_label_colors] must be either all hex colors or all CSS4 named colors. Found: ['xkcd:blue/purple' 'xkcd:dark red' 'xkcd:bluish grey']
Validation complete in 0:00:00.526995 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [25]:
#colors is non-named or hex
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(list(mcolors.TABLEAU_COLORS.keys()), 3))
save_and_test(adata, 'False')

--
BICCN_class_label_colors
['tab:gray' 'tab:blue' 'tab:orange']
BICCN_class_label:3 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Colors in uns[BICCN_class_label_colors] must be either all hex colors or all CSS4 named colors. Found: ['tab:gray' 'tab:blue' 'tab:orange']
Validation complete in 0:00:00.518464 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [26]:
adata.uns = {'title': 'temporary title'}
#colors counterpart in obs is boolean
adata.uns['is_primary_data_colors'] = np.array(random.sample(list(mcolors.CSS4_COLORS.keys()), 1))
save_and_test(adata, 'False')

--
is_primary_data_colors
['whitesmoke']
is_primary_data:1 values of type:bool
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Colors field uns[is_primary_data_colors] does not have a corresponding categorical field in obs. is_primary_data is present but is dtype bool
Validation complete in 0:00:00.522868 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [27]:
adata.uns = {'title': 'temporary title'}
#colors counterpart in obs is float
adata.uns['size_colors'] = np.array(create_color_list(17))
save_and_test(adata, 'False')

--
size_colors
['#00FF7F' '#8A2BE2' '#F0F8FF' '#FF1493' '#8B4513' '#1E90FF' '#000080'
 '#0000FF' '#FFC0CB' '#7FFF00' '#FAEBD7' '#87CEFA' '#8B008B' '#FFE4E1'
 '#008B8B' '#FDF5E6' '#98FB98']
size:55 values of type:float64
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Colors field uns[size_colors] does not have a corresponding categorical field in obs. size is present but is dtype float64
Validation complete in 0:00:00.527051 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [28]:
adata.uns = {'title': 'temporary title'}
adata.obs['BICCN_cluster_id'] = adata.obs['BICCN_cluster_id'].map(int)
#colors counterpart in obs is int
adata.uns['BICCN_cluster_id_colors'] = np.array(create_color_list(61))
save_and_test(adata, 'False')

--
BICCN_cluster_id_colors
['#8B0000' '#6495ED' '#2F4F4F' '#87CEEB' '#F5F5DC' '#FFF5EE' '#ADD8E6'
 '#808080' '#A9A9A9' '#E6E6FA' '#D3D3D3' '#FFC0CB' '#FFD700' '#EEE8AA'
 '#F0FFFF' '#006400' '#4B0082' '#7B68EE' '#FFFF00' '#FFFFF0' '#3CB371'
 '#00008B' '#FFFAFA' '#FF6347' '#F5F5F5' '#00FF00' '#FDF5E6' '#000080'
 '#A9A9A9' '#FF0000' '#DEB887' '#DCDCDC' '#FFFFFF' '#E9967A' '#F8F8FF'
 '#EE82EE' '#9ACD32' '#5F9EA0' '#7CFC00' '#FFFFE0' '#8FBC8F' '#F5DEB3'
 '#228B22' '#708090' '#87CEFA' '#40E0D0' '#6B8E23' '#7FFFD4' '#FFFAF0'
 '#6A5ACD' '#00FFFF' '#FFE4C4' '#FFE4B5' '#778899' '#BC8F8F' '#8B4513'
 '#FFB6C1' '#A0522D' '#DA70D6' '#00FA9A' '#FAEBD7']
BICCN_cluster_id:61 values of type:int64
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Colors field uns[BICCN_cluster_id_colors] does not have a corresponding categorical field in obs. BICCN_cluster_id is present but is dtype int64
Validation complete in 0:00:00.530629 with status is_valid=False
--

In [29]:
#not np.array - couldn't test as array seems to be converted upon writing AnnData