In [99]:
from distutils.dir_util import copy_tree
from pathlib import Path
import shutil
import boto3
import pandas as pd

#for path in Path(indicator_path).iterdir():
topic_mapping = {}
for fname in Path('annotated_suggestions').iterdir():
    df = pd.read_excel(fname.resolve())
    topic_mapping[fname.stem] = {row['topic']: row['choice'] for _, row in df.iterrows() if not pd.isnull(row['choice'])}

In [100]:
#topic_mapping

{'nih': {'health disparities interviews racial acceptability': 'Health equity',
  'infection parasite vaccine immunity vaccines': 'Malaria vaccine',
  'receptor inhibition receptors inhibitors inhibitor': 'Receptor (biochemistry)',
  'therapy blood in_patients treated plasma': 'Blood plasma',
  'drug compounds drugs fda toxicity': 'Drug development',
  'infection hiv infected viral virus': 'HIV/AIDS',
  'methods computational modeling framework individual': 'Computer science',
  'stem_cells stem_cell differentiation regeneration pluripotent': 'Stem Cell',
  'enzymes synthesis enzyme membrane chemical': 'Enzyme',
  'management practice evaluation manage compliance': 'Monitoring and evaluation',
  'tissue disease progression tissues damage': 'Necrosis',
  'kinase mitochondrial macrophages mitochondria phosphorylation': 'Genomics',
  'mri magnetic_resonance_imaging pet magnetic speech': 'Radiology',
  'randomized outcomes risk trial intervention': 'Design of experiments',
  'conference me

In [129]:
def compress_value(value):
    return ('%.i' if int(value) == value else '%.3f') % value

def copy_paths_and_map_labels(path, dest_dir, label_mapping):
    label_mapping['covid-related-projects'] = '__Diversity-of-covid-related-projects__'
    label_mapping['non-covid-related-projects'] = '__Diversity-of-non-covid-related-projects__'
    paths = []
    for dirname in Path(path).iterdir():
        if ' ' in dirname.name and dirname.name not in label_mapping:
            continue
        new_dirname = dirname
        if dirname.name in label_mapping:
            new_dirname = dirname.parent / label_mapping[dirname.name]
        for src in dirname.iterdir():
            dest = dest_dir / new_dirname / src.name
            dest.parent.mkdir(parents=True, exist_ok=True)
            shutil.copyfile(str(src), str(dest))
            paths.append(dest.resolve())
    return paths

# Convert automatic labels to human-readable labels
paths = []
dest = 'wiki_topics'
for dataset, mapping in topic_mapping.items():
    paths += copy_paths_and_map_labels(dataset, dest, mapping)
    dataset_funding = f'{dataset}-funding'
    if Path(dataset_funding).exists():
        paths += copy_paths_and_map_labels(dataset_funding, dest, mapping)

# Clean up numeric formatting, and save
s3 = boto3.resource('s3')
for path in paths:
    df = pd.read_csv(path)
    df.indicator_value = df.indicator_value.apply(compress_value).astype(str)
    df.to_csv(path, index=False)
    out_path = path.relative_to(Path('').resolve())
    s3.Bucket('eurito-csv-indicators').upload_file(str(path), str(out_path), ExtraArgs={'ACL':'public-read'})

# Sensible ideas for validation of interpretable indicators in the Covid-19 context

In [106]:
absolute_indicator_topics = ['COVID-19 pandemic', 'Health care', 'Public health']
relative_indicator_topics = ['Behavior',
                             'Climate change',
                             'Drug development',
                             'Educational technology | Privacy Policy',
                             'Health care',
                             'Health equity',
                             'Human behavior',
                             'Mid-level practitioner',
                             'Public health',
                             'Regional policy',
                             'Sustainability',
                             'Vocational education']

My recommendation is that for the following topics, you consider the total activity in the arXiv, Cordis and NIH datasets, which are indicated as "total_activity" in the following files:

In [144]:
import requests
def exists(url):
    response = requests.head(url)
    return response.status_code == 200

def _print_help(dest, ds, mapping, indicator_labels):
    _by_country = 'by-country'
    _nuts_1 = 'nuts-1'
    title = f"{ds}:"
    print(title, len(title)*'-', sep='\n')
    print()
    for _, label in mapping.items():
        if label in indicator_labels:
            subbucket = f'https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/{dest}/{ds}/{label.replace(" ","+")}'
            by_country = f'{subbucket}/{_by_country}.csv'
            by_nuts = f'{subbucket}/{_nuts_1}.csv'            
            output = []
            if exists(by_country):
                output.append(f'[{_by_country}]({by_country})')
            if exists(by_nuts):
                output.append(f'[{_nuts_1}]({by_nuts})')
            print('*', f'{label}:', ', '.join(output))
    print()

def print_help(indicator_labels, suptitle, dest='wiki_topics'):
    print(suptitle, len(suptitle)*'=', sep='\n')    
    print()
    for ds, mapping in topic_mapping.items():
        _print_help(dest, ds, mapping, indicator_labels)
        if ds in ['nih', 'cordis']:
            _print_help(dest, f'{ds}-funding', mapping, indicator_labels)

#print_help(absolute_indicator_topics, 'Total amount')
print_help(relative_indicator_topics, 'Relative amount')

Relative amount

nih:
----

* Health equity: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih/Health+equity/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih/Health+equity/nuts-1.csv)
* Drug development: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih/Drug+development/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih/Drug+development/nuts-1.csv)
* Mid-level practitioner: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih/Mid-level+practitioner/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih/Mid-level+practitioner/nuts-1.csv)
* Behavior: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih/Behavior/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih/Beha

Total amount (counts and/or funding)
=====================================

nih:
----

* COVID-19 pandemic: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih/COVID-19+pandemic/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih/COVID-19+pandemic/nuts-1.csv)
* Public health: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih/Public+health/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih/Public+health/nuts-1.csv)
* Health care: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih/Health+care/by-country.csv)

nih-funding:
------------

* COVID-19 pandemic: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih-funding/COVID-19+pandemic/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih-funding/COVID-19+pandemic/nuts-1.csv)
* Public health: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih-funding/Public+health/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih-funding/Public+health/nuts-1.csv)
* Health care: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih-funding/Health+care/by-country.csv)

cordis:
-------

* COVID-19 pandemic: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis/COVID-19+pandemic/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis/COVID-19+pandemic/nuts-1.csv)
* Health care: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis/Health+care/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis/Health+care/nuts-1.csv)

cordis-funding:
---------------

* COVID-19 pandemic: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis-funding/COVID-19+pandemic/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis-funding/COVID-19+pandemic/nuts-1.csv)
* Health care: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis-funding/Health+care/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis-funding/Health+care/nuts-1.csv)

arxiv:
------

* COVID-19 pandemic: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/arxiv/COVID-19+pandemic/by-country.csv)
* Public health: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/arxiv/Public+health/by-country.csv)


Relative amount
===============

nih:
----

* Health equity: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih/Health+equity/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih/Health+equity/nuts-1.csv)
* Drug development: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih/Drug+development/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih/Drug+development/nuts-1.csv)
* Mid-level practitioner: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih/Mid-level+practitioner/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih/Mid-level+practitioner/nuts-1.csv)
* Behavior: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih/Behavior/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih/Behavior/nuts-1.csv)
* Public health: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih/Public+health/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih/Public+health/nuts-1.csv)
* Vocational education: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih/Vocational+education/by-country.csv)
* Health care: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih/Health+care/by-country.csv)

nih-funding:
------------

* Health equity: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih-funding/Health+equity/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih-funding/Health+equity/nuts-1.csv)
* Drug development: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih-funding/Drug+development/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih-funding/Drug+development/nuts-1.csv)
* Mid-level practitioner: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih-funding/Mid-level+practitioner/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih-funding/Mid-level+practitioner/nuts-1.csv)
* Behavior: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih-funding/Behavior/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih-funding/Behavior/nuts-1.csv)
* Public health: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih-funding/Public+health/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih-funding/Public+health/nuts-1.csv)
* Vocational education: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih-funding/Vocational+education/by-country.csv)
* Health care: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/nih-funding/Health+care/by-country.csv)

cordis:
-------

* Educational technology | Privacy Policy: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis/Educational+technology+|+Privacy+Policy/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis/Educational+technology+|+Privacy+Policy/nuts-1.csv)
* Health care: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis/Health+care/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis/Health+care/nuts-1.csv)
* Sustainability: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis/Sustainability/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis/Sustainability/nuts-1.csv)
* Regional policy: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis/Regional+policy/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis/Regional+policy/nuts-1.csv)
* Automotive industry: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis/Automotive+industry/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis/Automotive+industry/nuts-1.csv)
* Climate change: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis/Climate+change/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis/Climate+change/nuts-1.csv)
* Vocational education: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis/Vocational+education/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis/Vocational+education/nuts-1.csv)

cordis-funding:
---------------

* Educational technology | Privacy Policy: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis-funding/Educational+technology+|+Privacy+Policy/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis-funding/Educational+technology+|+Privacy+Policy/nuts-1.csv)
* Health care: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis-funding/Health+care/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis-funding/Health+care/nuts-1.csv)
* Sustainability: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis-funding/Sustainability/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis-funding/Sustainability/nuts-1.csv)
* Regional policy: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis-funding/Regional+policy/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis-funding/Regional+policy/nuts-1.csv)
* Automotive industry: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis-funding/Automotive+industry/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis-funding/Automotive+industry/nuts-1.csv)
* Climate change: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis-funding/Climate+change/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis-funding/Climate+change/nuts-1.csv)
* Vocational education: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis-funding/Vocational+education/by-country.csv), [nuts-1](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/cordis-funding/Vocational+education/nuts-1.csv)

arxiv:
------

* Public health: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/arxiv/Public+health/by-country.csv)
* Human behavior: [by-country](https://eurito-csv-indicators.s3-eu-west-1.amazonaws.com/wiki_topics/arxiv/Human+behavior/by-country.csv)

In [120]:
topic_mapping.keys()

dict_keys(['nih', 'cordis', 'arxiv'])