In [5]:
# CHAPTERS = ['Notes', 'Supervised Learning', 'Unsupervised Learning', 'Statistical Learning']
CHAPTERS = ['Statistical_Learning', 'Linear Algebra']

## Update toc.yml based on the current file structure

In [6]:
import os
import yaml

def get_files(path):
    # Get all files in the path 
    entries = {}
    with os.scandir(path) as it:
        for entry in it:
            # Get file name and extension
            name = entry.name
            name_ext = name.split('.')

            # Skip if the entry is a hidden entry (.*) or a jupyter-book entry (_*)
            if name.startswith('.') or name.startswith('_'):
                continue
                
            if entry.is_file():
                # Save to dict if the file is a ipynb
                if len(name_ext) > 1 and name_ext[-1] == 'ipynb':
                    entries[name_ext[0]] = None
            else:
                # Recursive go into the folder
                entries[name_ext[0]] = get_files(os.path.join(path, name))
            
    return entries

def files_to_yaml(dirname, files):
    # Sort file names alphabetically
    file_folder = sorted(list(files.items()), key=lambda x: x[0])
    
    # Iterate all ipynb files or folders found by get_files(path)
    sub_yaml = []
    for key, value in file_folder:
        # Skip the index.ipynb
        if key.split('.')[0] == 'index':
            continue
        
        # Get the full path of the file
        new_dirname = os.path.join(dirname, key)
        
        entry = {}
        if value is not None:
            # If the entry is a folder, recursively go into it
            sections = files_to_yaml(new_dirname, value)
            
            # Skip if empty folder
            if len(sections) == 0:
                continue
            entry['sections'] = sections
        entry['file'] = new_dirname if value is None else os.path.join(new_dirname, 'index')
        
        # Remove numbering and _ for titles
        title = key
        num_title = title.split('_', 1)
        if num_title[0].isdigit():
            title = num_title[-1]
        entry['title'] = title.replace('_', ' ')

        sub_yaml.append(entry)
        
    return sub_yaml

toc_yaml = {
    'format': 'jb-book',
    'root': 'root',
    'parts': [],
}

for chapter in CHAPTERS:
    files = get_files(chapter)
    
    entry = {}
    entry['caption'] = chapter
    entry['numbered'] = 2
    entry['chapters'] = files_to_yaml(chapter, files)
    
    toc_yaml['parts'].append(entry)
toc_yaml
    
with open('_toc.yml', 'w') as file:
    yaml.dump(toc_yaml, file)

## Change every notebook's metadata to remove hidden cells

In [3]:
import glob
import nbformat

def change_metadata(path):
    notebook = nbformat.read(path, nbformat.NO_CONVERT)

    changed = False
    for cell in notebook.cells:
        metadata = cell.get('metadata', {})
        tags = set(metadata.get('tags', []))
        jupyter = metadata.get('jupyter', {})

        hiddens = [jupyter.get('source_hidden', False), jupyter.get('output_hidden', False)]
        new_tags = tags - {'hide-cell', 'hide-input', 'hide-output', 'remove-cell', 'remove-input', 'remove-output'}
        if all(hiddens):
            new_tags.add('remove-cell')
        elif hiddens[0]:
            new_tags.add('remove-input')
        elif hiddens[1]:
            new_tags.add('remove-output')

        if new_tags != tags:
            cell['metadata']['tags'] = list(new_tags)
            changed = True

    if changed:
        nbformat.write(notebook, path)
        
for chapter in CHAPTERS:        
    notebook_paths = glob.glob(f'./{chapter}/**/*.ipynb', recursive=True)
    for notebook_path in notebook_paths:
        change_metadata(notebook_path)

## Add last commited date to every notebook

In [33]:
import re
import glob
import datetime
import nbformat
from git import Repo

def update_date_ipynb(path):
    # Get ipynb commit history
    repo = Repo('./')
    commits = list(repo.iter_commits(paths=path))
    
    # Do nothing if the file is not commited
    if len(commits) == 0:
        return
        
    # Get ipynb first commit date
    first_commit_time = commits[-1].committed_datetime.strftime('%m-%d-%Y')
    
    # Use today's date as the updated date
    updated_time = datetime.datetime.now().strftime('%m-%d-%Y')
            
    # Format the date string
    new_source = f'*Updated {updated_time} (First commited {first_commit_time})*'
    
    # The date cell is always the first one in the notebook
    notebook = nbformat.read(path, nbformat.NO_CONVERT)
    date_cell = notebook.cells[0]
    
    # Define the regex pattern used to determine the date cell
    date_regex = '(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])-\d{4}'
    rest_regex = f'^\*(Updated ){date_regex}( \(First commited ){date_regex}\)\*$'
    pattern = re.compile(rest_regex)
    
    # Write the date to the date cell if it doesn't exist
    match = pattern.fullmatch(date_cell.source) 
    if date_cell.cell_type != 'markdown' or match == None:
        date_cell = {
            'cell_type': 'markdown',
            'metadata': {},
            'source': new_source,
        }
        notebook.cells.insert(0, nbformat.notebooknode.from_dict(date_cell))
        nbformat.write(notebook, path)
        
        return
        
    # Write the date to the date cell if the sources of ipynb have been changed
    nbdiff_outs = !nbdiff -s HEAD $path --no-color
    if len(nbdiff_outs) > 0:
        date_cell['source'] = new_source
        nbformat.write(notebook, path)
        
        return
        
for chapter in CHAPTERS:        
    notebook_paths = glob.glob(f'./{chapter}/**/*.ipynb', recursive=True)
    for notebook_path in notebook_paths:
        update_date_ipynb(notebook_path)

## Append to conf.py to allow block math

In [7]:
!rm -r _build
!jupyter-book config sphinx .

settings = ['suppress_warnings = ["myst.header"]', 'myst_dmath_double_inline = True']
with open('conf.py', 'a') as file:
    file.write('\n# Belows are more customized settings')
    for setting in settings:
        file.write(f'\n{setting}')
    
!sphinx-build . ./_build/html/ -b html
!rm conf.py

[32mWrote conf.py to /home/l1qiao/Projects/joeyonng-notebook[0m
[01mRunning Sphinx v4.5.0[39;49;00m
[01mmaking output directory... [39;49;00mdone
[etoc] Changing master_doc to 'root'
[etoc] Excluded 26 extra file(s) not in toc
checking bibtex cache... out of date
parsing bibtex file /home/l1qiao/Projects/joeyonng-notebook/references.bib... parsed 5 entries
[01mmyst v0.15.2:[39;49;00m MdParserConfig(renderer='sphinx', commonmark_only=False, enable_extensions=['colon_fence', 'dollarmath', 'linkify', 'substitution', 'tasklist'], dmath_allow_labels=True, dmath_allow_space=True, dmath_allow_digits=True, dmath_double_inline=True, update_mathjax=True, mathjax_classes='tex2jax_process|mathjax_process|math|output_area', disable_syntax=[], url_schemes=['mailto', 'http', 'https'], heading_anchors=None, heading_slug_func=None, html_meta=[], footnote_transition=True, substitutions=[], sub_delimiters=['{', '}'], words_per_minute=200)
[01mbuilding [mo]: [39;49;00mtargets for 0 po files that

## Push the build html pages to gh-pages branch on Github

In [46]:
# If it needs to input the password, run this command in a terminal 
!ghp-import -n -p -f _build/html

Enumerating objects: 27, done.
Counting objects: 100% (27/27), done.
Delta compression using up to 64 threads
Compressing objects: 100% (15/15), done.
Writing objects: 100% (15/15), 8.06 KiB | 2.69 MiB/s, done.
Total 15 (delta 10), reused 0 (delta 0)
remote: Resolving deltas: 100% (10/10), completed with 10 local objects.[K
To https://github.com/Joeyonng/joeyonng-notebook.git
   eeb06f0..517c444  gh-pages -> gh-pages
