# Migration tool "analitics" notebooks

Generate legacy list of tools:

```bash
nomad dev  toolkit-metadata > tutorials.jso
```


In [1]:
import json

with open('tutorials.json', 'r') as f:
  tutorials = json.load(f)['tutorials']

print(tutorials)

[{'authors': ['Ahmetcik, Emre', 'Ziletti, Angelo', 'Ouyang, Runhai', 'Sbailò, Luigi', 'Scheffler, Matthias', 'Ghiringhelli, Luca M.'], 'email': 'ghiringhelli@fhi-berlin.mpg.de', 'title': 'Symbolic regression via compressed sensing: a tutorial', 'description': 'In this tutorial we will show how to find descriptive parameters to predict materials properties using symbolic regrression combined with compressed sensing tools. The relative stability of the zincblende (ZB) versus rocksalt (RS) structure of binary materials is predicted and compared against a model trained with kernel ridge regression.', 'notebook_name': 'compressed_sensing.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-compressed-sensing', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/compressed_sensing.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/compressed_sensing.ipynb', 'link_video': 'https://www.youtube.com

In [2]:
keys=set()
for tutorial in tutorials:
    keys.update(list(tutorial.keys()))
keys

{'authors',
 'description',
 'email',
 'flags',
 'labels',
 'link',
 'link_doi_paper',
 'link_paper',
 'link_public',
 'link_video',
 'notebook_name',
 'title',
 'updated',
 'url'}

In [3]:
keys=set()
for tutorial in tutorials:
    keys.update(list(tutorial.get('flags',{}).keys()))
'flags', keys

('flags', {'featured', 'paper', 'top_of_list'})

In [4]:
keys=set()
for tutorial in tutorials:
    keys.update(list(tutorial['labels'].keys()))
keys

{'ai_methods',
 'application_keyword',
 'application_section',
 'application_system',
 'category',
 'language',
 'platform'}

In [5]:
keys=set()
for tutorial in tutorials:
    keys.update(list(tutorial.get('labels').get('application_keyword',[])))
keys

set()

In [6]:
keys=set()
for tutorial in tutorials:
    keys.update(list(tutorial.get('labels').get('application_section',[])))
keys

{'Analysing the content of the Archive',
 'Materials property prediction',
 'Timely artificial-intelligence applications to Materials Science',
 'Timely artificial-intelligence applications to Materials science',
 'Timely artificial-intelligence applications to materials science',
 'Tutorials for artificial-intelligence methods'}

In [7]:
keys=set()
for tutorial in tutorials:
    keys.update(list(tutorial.get('labels').get('application_system',[])))
keys

{'Atoms',
 'Binaries',
 'Bulk properties',
 'CO2 activation',
 'Elemental solids',
 'GDB molecular database',
 'GDB7',
 'Grain boundaries',
 'Heterogeneous catalysis',
 'Images',
 'Inorganic compounds',
 'Insulators',
 'Iron',
 'Low-dimensional materials',
 'Metals',
 'OQMD database',
 'Octet binaries',
 'Oxygen evolution reaction',
 'Oxygen reduction reaction',
 'Perovskites',
 'Rock salt',
 'Scaling relations',
 'Semicondictor oxides',
 'Silicon',
 'Surface',
 'Synthetic data',
 'Ternaries',
 'Tetradymites',
 'Topological insulators',
 'Transparent conducting oxides',
 'UCI regression dataset',
 'Zinc blende'}

In [8]:
keys=set()
for tutorial in tutorials:
    keys.update(list(tutorial.get('labels').get('category')))
keys

{'advanced_tutorial',
 'beginner_tutorial',
 'intermediate_tutorial',
 'query_tutorial'}

In [10]:
map_categories = {
    'advanced_tutorial': 'Advanced tutorial',
    'beginner_tutorial':  'Beginner tutorial',
    'intermediate_tutorial': 'Intermediate tutorial',
    'query_tutorial':'Query tutorial'
}

In [None]:
keys=set()
for tutorial in tutorials:
    keys.update(list(tutorial.get('labels').get('ai_methods',[])))
keys

In [None]:
keys=set()
for tutorial in tutorials:
    keys.update(list(tutorial.get('labels').get('language',[])))
keys

In [None]:
keys=set()
for tutorial in tutorials:
    keys.update(list(tutorial.get('labels').get('platform')))
keys

In [None]:
tutorial = tutorials[0]
tutorial


In [None]:
authors=[]
for author in tutorial.get('authors'):
    last_name, first_name = author.split(',', 1)
    authors.append({
        'last_name':last_name.strip(),
        'first_name': first_name.strip()
    })

authors[-1]['email'] = tutorial.get('email')
authors

In [12]:
def build_new_tutorial(tutorial):
    new = {
        "m_def": "nomad_aitoolkit.schema.AIToolkitNotebook",
        'name': tutorial['title'],
        'description': tutorial['description'],
        'date': tutorial['updated'],

        'category': map_categories[tutorial['labels']['category'][0]],
        'methods': [ {'name': v } for v in tutorial['labels']['ai_methods'] ],
        'applications': [ {'name': v } for v in tutorial['labels']['application_system'] ],
        'platform': 'Python'
    }

    new['authors'] = []
    for author in tutorial['authors']:
        last_name, first_name = author.split(',', 1)

        new['authors'].append({
            'last_name': last_name.strip(),
            'first_name': first_name.strip()
        })

    # print(tutorial.get('email'))
    # new['authors'][-1]['email'] = tutorial.get('email')

    new['references'] = []

    if tutorial.get('link_doi_paper'):
        # new['related_publications'] = [
        #     {
        #         'DOI_number': tutorial.get('link_doi_paper')
        #     }
        # ]

        new['references'].append(
            {
                'kind': 'article_doi',
                'uri': tutorial.get('link_doi_paper')
            }
        )


    if tutorial.get('link_public'):
        new['references'].append(
            {
                'kind': 'hub',
                'uri': tutorial['link_public']
            }
        )


    if tutorial.get('link_paper'):
        new['references'].append(
            {
                'kind': 'article_url',
                'uri': tutorial['link_paper']
            }
        )


    if tutorial.get('url'):
        new['references'].append(
            {
                'kind': 'repository',
                'uri': tutorial['url']
            }
        )

    if tutorial.get('link_video'):
        new['references'].append(
            {
                'kind': 'video',
                'uri': tutorial['link_video']
            }
        )

    slug = tutorial['url'].rsplit('/', 1)[1]
    if slug.startswith('analytics-'):
        slug = slug[10:]

    return slug, {'data': new}


for tutorial in tutorials:
    slug, new_tutorial = build_new_tutorial(tutorial)

    with open(slug+".archive.json", "w") as outfile:
        json.dump(new_tutorial, outfile, indent=2)
