In [63]:
import os
from nbconvert import HTMLExporter
import codecs
import nbformat
from traitlets.config import Config
import re
import pandas as pd
import markdown

In [64]:
notebook_path = '../scripts/site.ipynb'

### Vytvořit obsah

In [65]:
notebooks = sorted([f for f in os.listdir("../scripts/") if "ipynb" in f])
notebooks

['api.ipynb',
 'gis.ipynb',
 'http.ipynb',
 'jupyter.ipynb',
 'nlp.ipynb',
 'ocr.ipynb',
 'pdf.ipynb',
 'python.ipynb',
 'regex.ipynb',
 'site.ipynb',
 'tei.ipynb']

In [66]:
ordered_notebooks = [
'jupyter.ipynb',
'python.ipynb',
'http.ipynb',
'api.ipynb',
'site.ipynb',
'gis.ipynb',
'regex.ipynb',
'tei.ipynb',
'nlp.ipynb',
'pdf.ipynb',
'ocr.ipynb']

In [67]:
table_of_content = []
path = "../scripts/"
base_url = "https://github.com/CCS-ZCU/pribehy-dat/blob/master/scripts/"
htmlpreview_url = "https://htmlpreview.github.io/?https://github.com/CCS-ZCU/pribehy-dat/blob/master/htmls/"
for n, filename in enumerate(ordered_notebooks):
    notebook_node = nbformat.read(path + filename, as_version=4)
    cell_source = notebook_node["cells"][0]["source"] + "\n" + notebook_node["cells"][1]["source"]
    title = re.search("(#\s+)(.+)(\n)", cell_source).groups()[1]
    table_of_content.append({
        "chapter" :str(n+1) + ". " + title,
        "ipynb_url": base_url + filename, 
        "html_url" : htmlpreview_url + filename.replace(".ipynb", ".html")})
table_of_content_df = pd.DataFrame(table_of_content, columns=["chapter", "ipynb_url", "html_url"])
table_of_content_df

Unnamed: 0,chapter,ipynb_url,html_url
0,1. Jupyter: Stručný úvod,https://github.com/CCS-ZCU/pribehy-dat/blob/ma...,https://htmlpreview.github.io/?https://github....
1,"2. Python: Základní proměnné, funkce a datové ...",https://github.com/CCS-ZCU/pribehy-dat/blob/ma...,https://htmlpreview.github.io/?https://github....
2,3. HTTP: Dotazování webu,https://github.com/CCS-ZCU/pribehy-dat/blob/ma...,https://htmlpreview.github.io/?https://github....
3,4. API: Aplikační rozhraní,https://github.com/CCS-ZCU/pribehy-dat/blob/ma...,https://htmlpreview.github.io/?https://github....
4,5. Formální síťová analýza,https://github.com/CCS-ZCU/pribehy-dat/blob/ma...,https://htmlpreview.github.io/?https://github....
5,6. GIS: Analýza prostorových dat,https://github.com/CCS-ZCU/pribehy-dat/blob/ma...,https://htmlpreview.github.io/?https://github....
6,7. Regulérní výrazy,https://github.com/CCS-ZCU/pribehy-dat/blob/ma...,https://htmlpreview.github.io/?https://github....
7,8. TEI-XML: Digitální edice,https://github.com/CCS-ZCU/pribehy-dat/blob/ma...,https://htmlpreview.github.io/?https://github....
8,9. NLP: Zpracování přirozeného jazyka,https://github.com/CCS-ZCU/pribehy-dat/blob/ma...,https://htmlpreview.github.io/?https://github....
9,10. PDF: Extrakce textu,https://github.com/CCS-ZCU/pribehy-dat/blob/ma...,https://htmlpreview.github.io/?https://github....


In [68]:
def formatted_toc(row):
    markdown_link_ipynb = " [[ipynb]({})]".format(row["ipynb_url"])
    markdown_link_html = " [[html]({})]".format(row["html_url"])
    return  "*" + row["chapter"] + "*" + markdown_link_ipynb + markdown_link_html + "\n\n"
table_of_content_df["formatted_toc"] = table_of_content_df.apply(formatted_toc, axis=1)

In [69]:
content_markdown = "### Obsah\n" + "".join(list(table_of_content_df["formatted_toc"])) + "\n###"
content_markdown

'### Obsah\n*1. Jupyter: Stručný úvod* [[ipynb](https://github.com/CCS-ZCU/pribehy-dat/blob/master/scripts/jupyter.ipynb)] [[html](https://htmlpreview.github.io/?https://github.com/CCS-ZCU/pribehy-dat/blob/master/htmls/jupyter.html)]\n\n*2. Python: Základní proměnné, funkce a datové struktury* [[ipynb](https://github.com/CCS-ZCU/pribehy-dat/blob/master/scripts/python.ipynb)] [[html](https://htmlpreview.github.io/?https://github.com/CCS-ZCU/pribehy-dat/blob/master/htmls/python.html)]\n\n*3. HTTP: Dotazování webu* [[ipynb](https://github.com/CCS-ZCU/pribehy-dat/blob/master/scripts/http.ipynb)] [[html](https://htmlpreview.github.io/?https://github.com/CCS-ZCU/pribehy-dat/blob/master/htmls/http.html)]\n\n*4. API: Aplikační rozhraní* [[ipynb](https://github.com/CCS-ZCU/pribehy-dat/blob/master/scripts/api.ipynb)] [[html](https://htmlpreview.github.io/?https://github.com/CCS-ZCU/pribehy-dat/blob/master/htmls/api.html)]\n\n*5. Formální síťová analýza* [[ipynb](https://github.com/CCS-ZCU/pribeh

In [70]:
with open("../README.md", 'r') as file:
    data = file.read()

data = re.sub('### Obsah.+###', content_markdown, data, flags=re.DOTALL)
with open("../README.md", 'w') as file:
    file.write(data)

with open("../scripts/README.md", 'w') as file:
    file.write(content_markdown)

In [71]:
html = markdown.markdown(data)
with open("uvod.html", 'w') as file:
    file.write(html)

### Vytvoř renderované HTML verze notebooků a ulož je do složky htmls

In [72]:
def update_execution_times(notebook_node):
    exec_time = 1
    new_cells = []
    for cell in notebook_node["cells"]:
        if cell["cell_type"] == "code":
            new_cell = cell.copy()  # it's safer to copy the cell if you don't want to modify original one
            new_cell["metadata"]["execution_time"] = exec_time
            new_cell["execution_count"] = exec_time
            exec_time += 1
            new_cells.append(new_cell)
        else:
            new_cells.append(cell)
    notebook_node["cells"] = new_cells
    return notebook_node

In [54]:
c = Config(
    {
        "HTMLExporter": {
            "template_name": "full",  # Use the full template
        }
    }
)

for notebook_name in ordered_notebooks:
    # using nbformat to read the notebook
    notebook_node = nbformat.read("../scripts/" + notebook_name, as_version=4)
    notebook_node = update_execution_times(notebook_node)
    nbformat.write(notebook_node, "../scripts/" + notebook_name)
    # using HTMLExporter from nbconvert to convert .ipynb to .html
    html_exporter = HTMLExporter()
    body, _ = html_exporter.from_notebook_node(notebook_node)
    # writing the converted html to a file
    output_path = notebook_name.replace(".ipynb", ".html")
    with codecs.open(output_path, 'w', encoding='utf-8') as file:
        file.write(body)