https://github.com/chanzuckerberg/single-cell-curation/issues/513 <br>
https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#column_colors

In [1]:
#import cxg_upload
import matplotlib.colors as mcolors
import numpy as np
import os
import pandas as pd
import random
import scanpy as sc
import subprocess
from datetime import datetime

In [2]:
def validate(file):
    validate_process = subprocess.run(['cellxgene-schema', 'validate', file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    for line in validate_process.stdout.decode('utf-8').split('\n'):
        print(line)
    for line in validate_process.stderr.decode('utf-8').split('\n'):
        print(line)
        if 'is_valid=' in line:
            valid = line.split('=')[-1]
            return valid

In [3]:
def save_and_test(adata, expected):
    now = datetime.now() 
    dt_string = now.strftime('%m/%d %H:%M')
    adata.uns['title'] += (' - ' + dt_string)

    file = 'test.h5ad'
    adata.write(filename=file)
    test_adata = sc.read_h5ad(file)
    for k,v in test_adata.uns.items():
        if k.endswith('_colors'):
            print('--')
            print(k)
            print(v)
            obs_field = k.replace('_colors','')
            if obs_field in test_adata.obs.columns:
                uniq_vals = str(len(test_adata.obs[obs_field].unique()))
                val_type = str(test_adata.obs[obs_field].dtype)
                print(obs_field + ':' + uniq_vals + ' values of type:' + val_type)
            else:
                print(obs_field + ' not in obs')
            print('--')
    print('------------------')

    valid = validate(file)
    print('------------------')
    if expected != valid:
        print('\033[1m\033[91mERROR\033[0m')
    else:
        print('\033[1m\033[92mPASSED\033[0m')
        #if expected == 'True':
            #cxg_upload.upload(file)
    os.remove(file)

In [4]:
adata = sc.read_h5ad('../valid.h5ad')

In [5]:
df = pd.DataFrame(adata.obs.dtypes).reset_index()
df['unique len'] = df['index'].apply(lambda x: len(adata.obs[x].unique()))
df.set_index('index', inplace=True)
df

Unnamed: 0_level_0,0,unique len
index,Unnamed: 1_level_1,Unnamed: 2_level_1
BICCN_cluster_id,float64,61
QC,category,2000
BICCN_cluster_label,category,61
BICCN_subclass_label,category,17
BICCN_class_label,category,3
cluster_color,category,61
size,float64,55
temp_class_label,category,17
BICCN_ontology_term_id,category,17
assay_ontology_term_id,category,1


In [6]:
def create_color_list(count):
    return random.sample(list(mcolors.cnames.values()), count)

**Test valid cases**

In [7]:
#untouched, no colors defined
save_and_test(adata, 'True')

------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:01.642123 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [8]:
#colors matches length of obs - named & hex
adata.uns['title'] = '513 colors matches length of obs'

adata.uns['temp_class_label_colors'] = np.array(create_color_list(17))
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(list(mcolors.CSS4_COLORS.keys()), 3))
save_and_test(adata, 'True')

--
BICCN_class_label_colors
['yellowgreen' 'violet' 'lightgreen']
BICCN_class_label:3 values of type:category
--
--
temp_class_label_colors
['#F5F5DC' '#FF69B4' '#8B4513' '#FAF0E6' '#1E90FF' '#800080' '#98FB98'
 '#32CD32' '#696969' '#00FF00' '#FFE4B5' '#F0FFF0' '#9400D3' '#FFE4E1'
 '#D3D3D3' '#20B2AA' '#F8F8FF']
temp_class_label:17 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:01.643383 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [9]:
#colors exceeds length of obs - named & hex
adata.uns['title'] = '513 colors exceeds length of obs'

adata.uns['temp_class_label_colors'] = np.array([c.lower() for c in create_color_list(40)])
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(list(mcolors.CSS4_COLORS.keys()), 12))
save_and_test(adata, 'True')

--
BICCN_class_label_colors
['bisque' 'indianred' 'salmon' 'silver' 'pink' 'mediumslateblue'
 'lightcoral' 'red' 'lightcyan' 'ghostwhite' 'maroon' 'powderblue']
BICCN_class_label:3 values of type:category
--
--
temp_class_label_colors
['#fdf5e6' '#ffe4b5' '#8b4513' '#f5f5dc' '#ff7f50' '#adff2f' '#9370db'
 '#f5fffa' '#483d8b' '#9acd32' '#2e8b57' '#008080' '#ff00ff' '#fa8072'
 '#48d1cc' '#000000' '#6b8e23' '#deb887' '#778899' '#f0e68c' '#3cb371'
 '#db7093' '#faebd7' '#fffacd' '#663399' '#708090' '#dcdcdc' '#6495ed'
 '#f5deb3' '#afeeee' '#ff6347' '#add8e6' '#a9a9a9' '#ffa07a' '#fff5ee'
 '#f0f8ff' '#9932cc' '#008b8b' '#e6e6fa' '#ffb6c1']
temp_class_label:17 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:01.663600 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [14]:
adata.uns = {'title': 'temporary title'}
#colors for non-schema *_ontology_term_id - hex
adata.uns['title'] = '513 colors for non-schema *_ontology_term_id'

adata.uns['BICCN_ontology_term_id_colors'] = np.array(create_color_list(17))
save_and_test(adata, 'True')

--
BICCN_ontology_term_id_colors
['#008080' '#FFFAFA' '#708090' '#00FA9A' '#FAFAD2' '#E6E6FA' '#00008B'
 '#FF1493' '#D2691E' '#FA8072' '#FFF0F5' '#FF8C00' '#7B68EE' '#F0F8FF'
 '#FF4500' '#ADFF2F' '#000000']
BICCN_ontology_term_id:17 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:01.631746 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [11]:
#colors for non-schema_ontology_term_id - named
adata.uns['title'] = '513 colors for non-schema_ontology_term_id'

adata.uns['BICCN_ontology_term_id_colors'] = np.array(random.sample(list(mcolors.CSS4_COLORS.keys()), 17))
save_and_test(adata, 'True')

--
BICCN_ontology_term_id_colors
['powderblue' 'violet' 'ivory' 'purple' 'floralwhite' 'darkslategray'
 'skyblue' 'yellowgreen' 'deepskyblue' 'coral' 'beige' 'lightslategray'
 'tomato' 'slategrey' 'deeppink' 'azure' 'lightgrey']
BICCN_ontology_term_id:17 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Colors in uns[BICCN_ontology_term_id_colors] must be either all hex colors or all CSS4 named colors. Found: ['azure' 'beige' 'coral' 'darkslategray' 'deeppink' 'deepskyblue'
 'floralwhite' 'ivory' 'lightgrey' 'lightslategray' 'powderblue' 'purple'
 'skyblue' 'slategrey' 'tomato' 'violet' 'yellowgreen']
Validation complete in 0:00:00.525597 with status is_valid=False
------------------
[1m[91mERROR[0m


In [12]:
adata.uns = {'title': 'temporary title'}
#colors is for *_ontology_term_id = hex * named
adata.uns['title'] = '513 colors is for *_ontology_term_id'

adata.uns['sex_ontology_term_id_colors'] = np.array(create_color_list(2))
adata.uns['assay_ontology_term_id_colors'] = random.sample(list(mcolors.CSS4_COLORS.keys()), 1)
save_and_test(adata, 'True')

--
assay_ontology_term_id_colors
['limegreen']
assay_ontology_term_id:1 values of type:category
--
--
sex_ontology_term_id_colors
['#D3D3D3' '#FFFAFA']
sex_ontology_term_id:2 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:01.692633 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [13]:
adata.uns = {'title': 'temporary title'}
#duplicate colors in a list - hex & named
adata.uns['title'] = '513 duplicate colors in a list'

adata.uns['BICCN_class_label_colors'] = random.sample(list(mcolors.CSS4_COLORS.keys()), 2)
adata.uns['BICCN_class_label_colors'] += adata.uns['BICCN_class_label_colors']
adata.uns['BICCN_class_label_colors'] = np.array(adata.uns['BICCN_class_label_colors'])

adata.uns['cell_type_ontology_term_id_colors'] = create_color_list(3)
adata.uns['cell_type_ontology_term_id_colors'] += adata.uns['cell_type_ontology_term_id_colors']
adata.uns['cell_type_ontology_term_id_colors'] = np.array(adata.uns['cell_type_ontology_term_id_colors'])
save_and_test(adata, 'True')

--
BICCN_class_label_colors
['silver' 'snow' 'silver' 'snow']
BICCN_class_label:3 values of type:category
--
--
cell_type_ontology_term_id_colors
['#8B4513' '#E6E6FA' '#CD853F' '#8B4513' '#E6E6FA' '#CD853F']
cell_type_ontology_term_id:6 values of type:category
--
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Annotated categorical field BICCN_class_label must have at least 3 color options in uns[BICCN_class_label_colors]. Found: ['silver' 'snow']
ERROR: Annotated categorical field cell_type_ontology_term_id must have at least 6 color options in uns[cell_type_ontology_term_id_colors]. Found: ['#8B4513' '#CD853F' '#E6E6FA']
Validation complete in 0:00:00.534104 with status is_valid=False
------------------
[1m[91mERROR[0m


**Test invalid cases**

In [None]:
adata.uns = {'title': 'temporary title'}
#invalid test for length of colors array is less than obs counterpart - named & hex
adata.uns['temp_class_label_colors'] = np.array(create_color_list(2))
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 1))
save_and_test(adata, 'False')

In [None]:
adata.uns = {'title': 'temporary title'}
#invalid test for colors array that is empty np array
adata.uns['BICCN_class_label_colors'] = np.array([])
save_and_test(adata, 'False')

In [None]:
adata.uns = {'title': 'temporary title'}
#invalid test for colors array that is None or np.nan
adata.uns['BICCN_class_label_colors'] = np.array([np.nan, np.nan, np.nan])
save_and_test(adata, 'False')

In [None]:
adata.uns = {'title': 'temporary title'}
#invalid test for colors array that is empty strings
adata.uns['BICCN_class_label_colors'] = np.array(['','',''])
save_and_test(adata, 'False')

In [None]:
adata.uns = {'title': 'temporary title'}
#invalid test for colors as string or dictionary
adata.uns['BICCN_class_label_colors'] = ','.join(create_color_list(17))
adata.uns['sex_ontology_term_id_colors'] = ','.join(random.sample(mcolors.CSS4_COLORS.keys(), 2))
save_and_test(adata, 'False')

In [None]:
adata.uns = {'title': 'temporary title'}
#colors for schema fields - named & hex
adata.uns['cell_type_colors'] = np.array(create_color_list(6))
adata.uns['sex_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 2))
save_and_test(adata, 'False')

In [None]:
adata.uns = {'title': 'temporary title'}
#colors does not have obs counterpart
adata.uns['author_cell_type_colors'] = np.array(create_color_list(17))
save_and_test(adata, 'False')

In [None]:
adata.uns = {'title': 'temporary title'}
#colors is mixed named & hex
adata.uns['BICCN_ontology_term_id_colors'] = \
    np.array(create_color_list(20) + random.sample(mcolors.CSS4_COLORS.keys(), 20))
save_and_test(adata, 'False')

In [None]:
adata.uns = {'title': 'temporary title'}
#colors is non-named or hex
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.BASE_COLORS.keys(), 3))
save_and_test(adata, 'False')

In [None]:
#colors is non-named or hex
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(list(mcolors.BASE_COLORS.values()), 3))
save_and_test(adata, 'False')

In [None]:
#colors is non-named or hex
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.XKCD_COLORS.keys(), 3))
save_and_test(adata, 'False')

In [None]:
#colors is non-named or hex
adata.uns['BICCN_class_label_colors'] = np.array(random.sample(mcolors.TABLEAU_COLORS.keys(), 3))
save_and_test(adata, 'False')

In [None]:
adata.uns = {'title': 'temporary title'}
#colors counterpart in obs is boolean
adata.uns['is_primary_data_colors'] = np.array(random.sample(mcolors.CSS4_COLORS.keys(), 1))
save_and_test(adata, 'False')

In [None]:
adata.uns = {'title': 'temporary title'}
#colors counterpart in obs is float
adata.uns['size_colors'] = np.array(create_color_list(17))
save_and_test(adata, 'False')

In [None]:
adata.uns = {'title': 'temporary title'}
adata.obs['BICCN_cluster_id'] = adata.obs['BICCN_cluster_id'].map(int)
#colors counterpart in obs is int
adata.uns['BICCN_cluster_id_colors'] = np.array(create_color_list(61))
save_and_test(adata, 'False')

In [29]:
#not np.array - couldn't test as array seems to be converted upon writing AnnData