# Generate an overview notebook for measurement files

This notebook goes through a directory structure and creates a notebook with plotting code for each file found and an overview table of parameters found in the measurement metadata Excel file.

*Author: Ingo Breßler (ingo.bressler@bam.de)*  
*Last modified: 2025-06-05*

In [1]:
from pathlib import Path
datadir = Path("data")
samplesfile = datadir / "samples.xlsx" # metadata for each sample measured
dataExtension = ".pdh"
dataFilter = "[10]" # data files unsmeared by SAXSquant
outNotebookName = "SAXS Overview Notebook" # output notebook file name

In [None]:
import pandas as pd
import itables
itables.init_notebook_mode()
samples = pd.read_excel(samplesfile, skiprows=1)
samples

In [None]:
import re, nbformat
from nbformat import v4 as nbf
def processSubfolder(datadir):
    print("Entering", datadir)
    cells = [nbf.new_markdown_cell(f"## {datadir.name}")]
    plotcode = """
plotfile(r"{fn}", "{label}");
"""
    for entry in datadir.iterdir():
        if (not entry.is_file()
            or entry.suffix != dataExtension
            or dataFilter not in entry.name):
            continue
        print("  Creating cells for", entry)
        sampleId = re.search('[sS][0-9]+', entry.stem).group()
        sample = samples[samples["SAXS ID"] == sampleId]
        sampleName = sample["sample"].values[0]
        cells.append(nbf.new_markdown_cell(
            f"### {sampleName} ({sampleId})\n\n"
            f"{sample.to_markdown()}"))
        cells.append(nbf.new_code_cell(
            plotcode.strip().format(fn=entry, label=f"{sampleName} ({sampleId})")))
    return cells

def processDatadir(datadir):
    cells = [nbf.new_markdown_cell("# Data Overview")]
    code = """
from pathlib import Path
datadir = Path("data")
samplesfile = datadir / "samples.xlsx"
dataExtension = ".pdh"
dataFilter = "[10]"
outNotebookName = "SAXS Overview Notebook"
"""
    cells.append(nbf.new_code_cell(code.strip()))
    code = """
import pandas as pd
import itables
itables.init_notebook_mode()
samples = pd.read_excel(samplesfile, skiprows=1)
samples
"""
    cells.append(nbf.new_code_cell(code.strip()))
    code = """
# some general imports
from jupyter_analysis_tools import readdata
import matplotlib.pyplot as plt
def plotfile(filename, label):
    df,_ = readdata(filename,
        #q_range=(0.1, 1.3)
    )
    df['e'] = df['e'].clip(lower=0) # prevent negative errorbar
    df.plot("q", "I", yerr="e", logx=True, logy=True, label=label, grid=True, figsize=(10,5),
            xlabel = r'$q$ (nm$^{{-1}}$)', ylabel = 'Intensity', ecolor = 'lightgray');
"""
    cells.append(nbf.new_code_cell(code.strip()))
    for entry in datadir.iterdir():
        if not entry.is_dir():
            continue
        cells += processSubfolder(entry)
    #print(cells)
    nb = nbf.new_notebook()
    nb["cells"] = cells
    nbformat.write(nb, f"{outNotebookName}.ipynb")

In [49]:
processDatadir(datadir)

Entering data\2024-04-15
  Creating cells for data\2024-04-15\S16312[10].pdh
  Creating cells for data\2024-04-15\S16344[10].pdh
  Creating cells for data\2024-04-15\S16346[10].pdh
  Creating cells for data\2024-04-15\S16347[10].pdh
Entering data\2024-10-02
  Creating cells for data\2024-10-02\S16439[10].pdh
  Creating cells for data\2024-10-02\S16440[10].pdh
  Creating cells for data\2024-10-02\S16441[10].pdh
  Creating cells for data\2024-10-02\S16442[10].pdh
  Creating cells for data\2024-10-02\S16443[10].pdh
Entering data\2024-11-20
  Creating cells for data\2024-11-20\S17105[10].pdh
  Creating cells for data\2024-11-20\S17106[10].pdh
  Creating cells for data\2024-11-20\S17108[10].pdh
  Creating cells for data\2024-11-20\S17109[10].pdh
  Creating cells for data\2024-11-20\S17110[10].pdh
  Creating cells for data\2024-11-20\S17111[10].pdh
  Creating cells for data\2024-11-20\S17112[10].pdh
  Creating cells for data\2024-11-20\S17113[10].pdh
  Creating cells for data\2024-11-20\S171