<img width="10%" alt="Naas" src="https://landen.imgix.net/jtci2pxwjczr/assets/5ice39g4.png?w=160"/>

# CI/CD - Make sure all notebooks respects our format policy

**Tags:** #naas

**Author:** [Maxime Jublou](https://www.linkedin.com/in/maximejublou/)

# Input

### Import libraries

In [None]:
import json
import glob
from rich import print
import pydash
import re

## Model

### Utility functions

These functions are used by other to not repeat ourselves.

In [None]:
def tag_exists(tagname, cells):
    for cell in cells:
        if tagname in pydash.get(cell, 'metadata.tags', []):
            return True
    return False

def regexp_match(regex, string):
    matches = re.finditer(regex, string, re.MULTILINE)
    return len(list(matches)) >= 1

def check_regexp(cells, regex, source):
    cell_str = pydash.get(cells, source, '')
    return regexp_match(regex, cell_str)

def check_title_exists(cells, title):
    for cell in cells:
        if pydash.get(cell, 'cell_type') == 'markdown' and regexp_match(rf"^## *{title}", pydash.get(cell, 'source[0]')):
            return True
    return False

### Check functions

This functions are used to check if a notebook contains the rights cells with proper formatting.

In [None]:
def check_naas_logo(cells):
    logo_content = '<img width="10%" alt="Naas" src="https://landen.imgix.net/jtci2pxwjczr/assets/5ice39g4.png?w=160"/>'
    if pydash.get(cells, '[0].cell_type') == 'markdown' and pydash.get(cells, '[0].source[0]', '').startswith(logo_content):
        return (True, '')
    return (False, f'''
    Requirements:
        - Cell number: 1
        - Cell type: Markdown
        - Shape: {logo_content}
    ''')

def check_title_match_regexp(cells):
    return (check_regexp(cells, r"markdown", '[1].cell_type') and check_regexp(cells, r"^#.*-.*", '[1].source[0]'), '''
    Requirements:
        - Cell number: 2
        - Cell type: Markdown
        - Shape: "# something - some other thing"
    ''')

def check_tool_tags(cells):
    return (check_regexp(cells, r"markdown", '[2].cell_type') and check_regexp(cells, r"^\*\*Tags:\*\* (#[1-9,a-z,A-Z]*( *|$))*", '[2].source[0]'), '''
    Requirements:
        - Cell number: 3
        - Cell type: Markdown
        - Shape: "**Tags:** #atLeastOneTool"
    ''')

def check_author(cells):
    return (check_regexp(cells, r"markdown", '[3].cell_type') and check_regexp(cells, r"^\*\*Author:\*\* *.*", '[3].source[0]'), '''
    Requirements:
        - Cell number: 4
        - Cell type: Markdown
        - Shape: "**Author:** At least one author name"
    ''')

def check_input_title_exists(cells):
    return (check_title_exists(cells, 'Input'), '''
    Requirements:
        - Cell number: Any
        - Cell type: Markdown
        - Shape: "## Input"
    ''')

def check_model_title_exists(cells):
    return (check_title_exists(cells, 'Model'), '''
    Requirements:
        - Cell number: Any
        - Cell type: Markdown
        - Shape: "## Model"
    ''')

def check_output_title_exists(cells):
    return (check_title_exists(cells, 'Output'), '''
    Requirements:
        - Cell number: Any
        - Cell type: Markdown
        - Shape: "## Output"
    ''')

## Output

In [None]:
got_errors = False
error_counter = 0
for file in glob.glob('../../**/*.ipynb', recursive=True):
    # Do not check notebooks in .github or at the root of the project.
    if '.github' in file or len(file.split('/')) == 3:
        continue

    print(f"Analyzing {file}")
    notebook = json.load(open(file))
    cells = notebook.get('cells')
    
    filename = "[dark_orange]" + file.replace("../../", "") + "[/dark_orange]"
    outputs = [f'Errors found in: {filename}']
    should_display_debug = False
    
    for checkf in [
        check_title_match_regexp,
        check_tool_tags,
        check_author,
        check_input_title_exists,
        check_model_title_exists,
        check_output_title_exists]: 
        
        result, msg = checkf(cells)
        if result is False:
            should_display_debug = True
        status_msg = "[bright_green]OK[/bright_green]" if result is True else f"[bright_red]KO {msg}[/bright_red]"
        outputs.append(f'{checkf.__name__} ... {status_msg}')
        
    if should_display_debug:
        got_errors = True
        error_counter += 1
        for msg in outputs:
            print(msg)
        print("\n")

if got_errors == True:
    print(f'[bright_red]You have {error_counter} notebooks having errors!')
    exit(1)