<img width="10%" alt="Naas" src="https://landen.imgix.net/jtci2pxwjczr/assets/5ice39g4.png?w=160"/>

# Jupyter Notebooks - Add tags to cells
<a href="https://app.naas.ai/user-redirect/naas/downloader?url=https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/template.ipynb" target="_parent">
<img src="https://img.shields.io/badge/-Open%20in%20Naas-success?labelColor=000000&logo="/>
</a>

**Tags:** #jupyter #awesome-notebooks #tags #snippet

**Author:** [Florent Ravenel](https://www.linkedin.com/in/florent-ravenel/)

## Input

### Import libraries

In [None]:
import os
import json
from naas_drivers import notion
import pandas as pd
import naas

### Variables

In [None]:
# Input
notion_database = 'https://www.notion.so/naas-official/b75fca2e4704433a8fc914032e089054?v=376dd88315454e89bf128433e7ae0420'
NOTION_TOKEN = naas.secret.get("NOTION_TOKEN")

## Model

### Get Notion DB

In [None]:
def get_notion_df(notion_database):
    database_id = notion_database.split("/")[-1].split("?v=")[0]
    pages = notion.connect(NOTION_TOKEN).database.query(database_id, query={})
    print("📊 Pages in Notion DB:", len(pages))

    columns = pages[0].df().Name.tolist()
    df = pd.DataFrame()

    for page in pages:
        values = page.df().drop("Type", axis=1).to_dict().get("Value")
        tmp_df = pd.DataFrame([values])
        df = pd.concat([df, tmp_df])

    # Rename columns
    to_rename = {}
    for i, v in enumerate(columns):
        to_rename[i] = v

    df = df.rename(columns=to_rename)
    
    # Filter on active notebooks
    df = df[df["Active"].astype(str) == "True"]
    print("Current notebooks:", len(df))
    return df

df_notion = get_notion_df(notion_database)
df_notion

### Update business tags

In [None]:
def add_business_tags(df_notion, notebook_path, source):
    # Get current tags
    tags = source.replace(' ', "").split("#")
    tags.pop(0)
#     print(tags)
    
    # Get tags to update
    nb_name = notebook_path.split("/")[-1]
    tmp_df = df_notion[df_notion.Name == nb_name].reset_index(drop=True)
    
    nb_source = notebook_path.split("/")[0].lower().replace(" ", "").strip()
    nb_domain = tmp_df.loc[0, "Domain"][2:].lower().replace(" ", "").strip()
    nb_type = tmp_df.loc[0, "Type"].lower().replace(" ", "").strip()
    nb_destination = tmp_df.loc[0, "Destination"].lower().replace(" ", "").strip()
#     print(nb_source, nb_domain, nb_type, nb_destination)
    
    # Add source as first element
    if nb_source != tags[0]:
        tags = [nb_source] + tags

    # Add domain
    if nb_domain not in tags:
        tags += [nb_domain]

    # Add type
    nb_types = nb_type.split(",")
    for nb_t in nb_types:
        if nb_t not in tags:
            tags += [nb_t]

    check_types = ["snippet", "automation", "analytics", "ai"]
    for i, t in enumerate(tags):
        if t in check_types and t not in nb_types:
            tags.pop(i)

    nb_destinations = nb_destination.split(",")
    for nb_d in nb_destinations:
        if nb_d not in tags:
            tags += [nb_d]

    tags = " #".join(tags)
    tags = f"**Tags:** #{tags}"
    return [tags]

### Update notebook

In [None]:
from pprint import pprint
import uuid

def update_notebook(notebook_path):
    with open(notebook_path) as f:
        nb = json.load(f)
#     pprint(nb)
        
    new_cells = []
    cells = nb.get("cells")
    
    # Apply change
    for cell in cells:
        cell_type = cell.get('cell_type')
        sources = cell.get('source') 
        
        # Update business tags
        new_source = []
        for source in sources:
            if "**Tags:** #" in source:
                new_source = add_business_tags(df_notion, notebook_path, source)
                print(new_source)
                cell["source"] = new_source
                break
        
        new_cells.append(cell)
        
    # Save notebook
    nb_new = nb.copy()
    nb_new["cells"] = new_cells
    nb_new["metadata"]["naas"] = {"notebook_path": notebook_path,
                                  "notebook_id": str(uuid.uuid4())}
    with open(notebook_path, 'w') as f:
        json.dump(nb_new, f, indent=1)
    print(f"✔️ {notebook_path} saved in Naas.")
    
# notebook_path = "Jupyter Notebooks/Jupyter_Notebooks_Add_cells_in_notebook_json.ipynb"
# update_notebook(notebook_path)

## Output

### Update all awesome notebooks

In [None]:
# Loop on awesome-notebook dir
directories = sorted(os.listdir())
for directory in directories:
    if "." not in directory and directory not in ["LICENSE", "Makefile"]:
        subdirs = os.listdir(directory)
        for subdir in subdirs:
            if subdir.endswith(".ipynb"):
                notebook_path = os.path.join(directory, subdir)
                print("Started notebook... ", notebook_path)
                update_notebook(notebook_path)