In [1]:
import os
from nbconvert import HTMLExporter
import codecs
import nbformat
from traitlets.config import Config
import re
import pandas as pd
import markdown

### Vytvořit obsah

In [3]:
notebooks = sorted([f for f in os.listdir("../scripts/") if "ipynb" in f])
notebooks

In [4]:
ordered_notebooks = [
'jupyter.ipynb',
'python.ipynb',
'http.ipynb',
'api.ipynb',
'site.ipynb',
'gis.ipynb',
'regex.ipynb',
'tei.ipynb',
'nlp.ipynb',
'pdf.ipynb',
'ocr.ipynb',
'abm.ipynb']

In [5]:
table_of_content = []
path = "../scripts/"
base_url = "https://github.com/CCS-ZCU/pribehy-dat/blob/master/scripts/"
htmlpreview_url = "https://htmlpreview.github.io/?https://github.com/CCS-ZCU/pribehy-dat/blob/master/htmls/"
for n, filename in enumerate(ordered_notebooks):
    notebook_node = nbformat.read(path + filename, as_version=4)
    cell_source = notebook_node["cells"][0]["source"] + "\n" + notebook_node["cells"][1]["source"]
    title = re.search("(#\s+)(.+)(\n)", cell_source).groups()[1]
    table_of_content.append({
        "chapter" :str(n+1) + ". " + title,
        "ipynb_url": base_url + filename, 
        "html_url" : htmlpreview_url + filename.replace(".ipynb", ".html")})
table_of_content_df = pd.DataFrame(table_of_content, columns=["chapter", "ipynb_url", "html_url"])
table_of_content_df

In [6]:
def formatted_toc(row):
    markdown_link_ipynb = " [[ipynb]({})]".format(row["ipynb_url"])
    markdown_link_html = " [[html]({})]".format(row["html_url"])
    return  "*" + row["chapter"] + "*" + markdown_link_ipynb + markdown_link_html + "\n\n"
table_of_content_df["formatted_toc"] = table_of_content_df.apply(formatted_toc, axis=1)

In [7]:
content_markdown = "### Obsah\n" + "".join(list(table_of_content_df["formatted_toc"])) + "\n###"
content_markdown

In [8]:
with open("../README.md", 'r') as file:
    data = file.read()

data = re.sub('### Obsah.+###', content_markdown, data, flags=re.DOTALL)
with open("../README.md", 'w') as file:
    file.write(data)

with open("../scripts/README.md", 'w') as file:
    file.write(content_markdown)

In [36]:
#html = markdown.markdown(data)
#with open("uvod.html", 'w') as file:
#    file.write(html)

### Vytvoř renderované HTML verze notebooků a ulož je do složky htmls

In [2]:
from nbconvert.preprocessors import ClearOutputPreprocessor

c = Config(
    {
        "HTMLExporter": {
            "template_name": "full",  # Use the full template
        }
    }
)

def update_execution_times(notebook_node):
    exec_time = 1
    new_cells = []
    for cell in notebook_node["cells"]:
        if cell["cell_type"] == "code":
            new_cell = cell.copy()  # it's safer to copy the cell if you don't want to modify original one
            new_cell["metadata"]["execution_time"] = exec_time
            new_cell["execution_count"] = exec_time
            exec_time += 1
            new_cells.append(new_cell)
        else:
            new_cells.append(cell)
    notebook_node["cells"] = new_cells
    return notebook_node


def strip_output(notebook_node):
    preprocessor = ClearOutputPreprocessor()
    return preprocessor.preprocess(notebook_node, {})[0]

In [3]:
# apply to individual notebook
notebook_name = "regex.ipynb"
# using nbformat to read the notebook
notebook_node = nbformat.read("../scripts/" + notebook_name, as_version=4)

# update execution times
updated_notebook_node = update_execution_times(notebook_node)

# don't strip outputs and save as .html from the updated but not stripped notebook
html_exporter = HTMLExporter()
body, _ = html_exporter.from_notebook_node(updated_notebook_node)

# writing the converted html to a file
output_path = "../htmls/" + notebook_name.replace(".ipynb", ".html")
with codecs.open(output_path, 'w', encoding='utf-8') as file:
    file.write(body)

    # strip output and save as .ipynb
stripped_notebook_node = strip_output(updated_notebook_node)
nbformat.write(stripped_notebook_node, "../scripts/" + notebook_name)

In [7]:
for notebook_name in ordered_notebooks:
    # using nbformat to read the notebook
    notebook_node = nbformat.read("../scripts/" + notebook_name, as_version=4)

    # update execution times
    updated_notebook_node = update_execution_times(notebook_node)

    # don't strip outputs and save as .html from the updated but not stripped notebook
    html_exporter = HTMLExporter()
    body, _ = html_exporter.from_notebook_node(updated_notebook_node)

    # writing the converted html to a file
    output_path = "../htmls/" + notebook_name.replace(".ipynb", ".html")
    with codecs.open(output_path, 'w', encoding='utf-8') as file:
        file.write(body)

    # strip output and save as .ipynb
    stripped_notebook_node = strip_output(updated_notebook_node)
    nbformat.write(stripped_notebook_node, "../scripts/" + notebook_name)