In [1]:
import nbformat
import glob, os, re

def load_notebook(file_path):
    try:
        name = os.path.splitext(os.path.basename(file_path))[0]
        nb = nbformat.read(file_path, as_version=4)
        return name, nb
    except Exception as e:
        print(f"Error reading {file_path}: {e}")
        return None

file_paths = glob.glob("../notebooks/*.ipynb")
raw_notebooks = {}
for file_path in file_paths:
    name, nb = load_notebook(file_path)
    if nb is not None:
        raw_notebooks[name] = nb
print(f"Successfully loaded {len(raw_notebooks)} notebooks out of {len(file_paths)} found.")

# unravel files
notebooks = {}
for name, nb in raw_notebooks.items():
    notebooks[name] = []
    for cell in nb.cells:
        notebooks[name] += [line for line in cell.source.splitlines() if line.strip()]

Successfully loaded 21 notebooks out of 21 found.


In [2]:
includes = ["polar_verity_sense", "fitbit_charge_6", "abbott_freestyle_libre_1", "biostrap_evo", "polar_h10",
            "withings_sleep", "fitbit_sense", "withings_body_plus", "fitbit_charge_4", "dexcom_g6_pro",
            "whoop_strap_4", "coros_pace_2", "oura_ring_gen_3", "garmin_fenix_7s", "withings_scanwatch"]
specials = ["cronometer", "polar_vantage", "strava", "my_fitness_pal"]
exclude = ["Wearipedia_Extension_Guide", "Multi_Person_Wearable_Data_Collection_Chrome_Extension_for_Fitbit"]
headers = [
    "Setup",
    "Authentication and Authorization",
    "Data Extraction",
    "Data Exporting",
    "Adherence",
    "Visualization",
    "Advanced Visualization",
    "Outlier Detection and Data Cleaning",
    "Statistical Data Analysis"
]
headers.reverse()
print("Total notebooks: ", len(includes)+len(specials))

Total notebooks:  19


In [3]:
notebook_headers = {}
for name, lines in notebooks.items():
    #print(name)
    if name in exclude:
        #print("exclude!")
        continue
    keys = headers.copy()
    if name in specials:
        #print("special!")
        keys[0] = "Outlier Detection and Data Cleaning"
        keys[1] = "Statistical Data Analysis"
        #print(keys)
    sections = {}
    current_section = "Introduction"  # Everything before the first numbered header
    sections[current_section] = 0
    cur_key = keys.pop()
    for line in lines:
        if not keys: # handle when we are at last key
            sections[current_section] += 1
        pattern = rf"#\s\d.*{cur_key}"
        if re.search(pattern, line):
            #print(line)
            current_section = cur_key
            cur_key = None if not keys else keys.pop()
            sections[current_section] = 0
        else:
            sections[current_section] += 1
    notebook_headers[name] = sections

In [4]:
from collections import defaultdict

# Summing all values across notebooks
summed_headers = defaultdict(int)
for notebook in notebook_headers.values():
    for key, value in notebook.items():
        summed_headers[key] += value

# Convert back to a regular dictionary
summed_headers = dict(summed_headers)
total_lines = sum([value for key, value in summed_headers.items()])
total_notebooks = len(notebook_headers)
print(total_lines, total_notebooks)

15011 19


In [5]:
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import numpy as np

labels = list(summed_headers.keys())
sizes = list(summed_headers.values())

# Custom colors for each section
colors = [
    "#ff9999", "#66b3ff", "#99ff99", "#ffcc99", "#c2c2f0",
    "#ffb3e6", "#c4e17f", "#76D7C4", "#F7DC6F", "#D98880"
]

# Set up custom global fonts
plt.rcParams['font.family'] = 'DejaVu Sans'
plt.rcParams['font.size'] = 14

# Create the figure and axis
fig, ax = plt.subplots(figsize=(8, 8))

# Create the pie chart:
# - startangle=90 ensures the first slice ("Introduction") begins at 12 o’clock.
# - counterclock=False makes the wedges drawn in a clockwise order.
wedges, texts, autotexts = ax.pie(
    sizes,
    labels=labels,
    colors=colors,
    autopct='%1.1f%%',
    startangle=90,        # Start at 12 o’clock.
    counterclock=False,   # Draw in a clockwise direction.
    textprops={'color': "black", 'fontsize': 12, 'fontweight': 'bold'}
)

# Ensure the pie chart is drawn as a circle.
ax.axis('equal')

# Add a custom title with styling.
ax.set_title(
    f"Section Distribution across {total_notebooks} notebooks totalling {total_lines} lines of content",
    fontsize=20,
    fontweight='bold',
    fontfamily='DejaVu Sans',
    color='#2C3E50',
    pad=20
)

# Draw a firm black line from the center to the 12 o’clock position.
start_angle_rad = np.deg2rad(90)  # 90° in radians corresponds to 12 o'clock.
x_end = np.cos(start_angle_rad)
y_end = np.sin(start_angle_rad)
ax.plot([0, x_end], [0, y_end], color='black', lw=3, zorder=10)

# Place a legend outside the pie chart.
#ax.legend(
#    wedges,
#    labels,
#    title="Sections",
#    loc="center left",
#    bbox_to_anchor=(1, 0, 0.5, 1),
#    prop={'size': 12, 'weight': 'bold', 'family': 'DejaVu Sans'}
#)

# Save the figure as a PDF file.
pdf_filename = "piechart.pdf"
with PdfPages(pdf_filename) as pdf:
    pdf.savefig(fig, bbox_inches='tight')
    plt.close()

print(f"Pie chart has been successfully saved as '{pdf_filename}'.")

Pie chart has been successfully saved as 'piechart.pdf'.
