## ScanDatabase builder for fast query

Build a comprehensive, fast query-able parque 'scans database'

see API documentation for more details

In [6]:
from datetime import date
from geecs_data_utils import ScanPaths
from geecs_data_utils.scans_database.builder import ScanDatabaseBuilder

# Inputs
data_root = ScanPaths.paths_config.base_path
experiment = "Undulator"
output_path = data_root / experiment / "scan_database_parquet"
date_range = (date(2022, 1, 3), date(2025, 8, 5))

ScanDatabaseBuilder.stream_to_parquet(
    data_root=data_root,
    experiment=experiment,
    output_path=output_path,
    date_range=date_range,
    buffer_size=50,
    max_scans=1000000,
    mode="overwrite",  # or 'append' or 'overwrite'
)

 After database is initially built, new scan entries are easily added using mode='append' and data_range = None

In [7]:
# Picks up from the last date in _update_log.json through today
ScanDatabaseBuilder.stream_to_parquet(
    data_root=data_root,
    experiment=experiment,
    output_path=output_path,
    date_range=None,  # <- auto-resume via sidecar
    buffer_size=50,
    mode="append",
)

Filter scans database based on many flexible criteria

In [5]:
from geecs_data_utils.scans_database.database import ScanDatabase
from datetime import date

data_root = ScanPaths.paths_config.base_path
experiment = "Undulator"
output_path = data_root / experiment / "scan_database_parquet"

# initialize ScanDabase object
db = ScanDatabase(output_path)

# apply filters

# Filter by date first, for fast initial filter
db.date_range(date(2025, 8, 5), date(2025, 8, 8))

# can filter by scanparameter, case insensitive. use alias, var name or any part
db.filter_scan_parameter_contains("shotnumber")

# Filter by ecs live dump entries: (device_like, variable_like, target value, tolerance)
db.filter_ecs_value_within("hexapod", "y", target=18.5, tol=0.5)

# use defined named filters autoloaded based on experiment name. Can have various date range validities
db.apply("PMQ_inserted")

# convert result to dataframe
df = db.to_df()

print("rows:", len(df))
df.head(5)

[INFO] Loaded 1/1 filters from undulator.yml
rows: 10


Unnamed: 0,day,number,experiment,scalar_data_file,tdms_file,non_scalar_devices,scan_parameter,start,end,step_size,...,scan_mode,scan_description,background,scan_metadata_raw_fields,ecs_dump,has_analysis_dir,notes,year,month,__ecs
3,5,2,Undulator,Z:\data\Undulator\Y2025\08-Aug\25_0805\scans\S...,Z:\data\Undulator\Y2025\08-Aug\25_0805\scans\S...,"[UC_BCaveMagSpecCam1, UC_BCaveMagSpecCam1-inte...",Shotnumber,0,1,1,...,noscan,. scanning Shotnumber. None,0,"{""Scan No"": ""2"", ""ScanStartInfo"": "". scanning ...","{""experiment_name"": ""Undulator"", ""devices"": [{...",1,,2025,8,"{'experiment_name': 'Undulator', 'devices': [{..."
4,5,3,Undulator,Z:\data\Undulator\Y2025\08-Aug\25_0805\scans\S...,Z:\data\Undulator\Y2025\08-Aug\25_0805\scans\S...,"[UC_BCaveMagSpecCam1, UC_BCaveMagSpecCam1-inte...",Shotnumber,0,1,1,...,noscan,no scan for EMP diagnostics. scanning Shotnumb...,0,"{""Scan No"": ""3"", ""ScanStartInfo"": ""no scan for...","{""experiment_name"": ""Undulator"", ""devices"": [{...",1,,2025,8,"{'experiment_name': 'Undulator', 'devices': [{..."
6,5,5,Undulator,Z:\data\Undulator\Y2025\08-Aug\25_0805\scans\S...,Z:\data\Undulator\Y2025\08-Aug\25_0805\scans\S...,"[UC_BCaveMagSpecCam1, UC_BCaveMagSpecCam1-inte...",Shotnumber,0,1,1,...,noscan,no scan. scanning Shotnumber. None,0,"{""Scan No"": ""5"", ""ScanStartInfo"": ""no scan. sc...","{""experiment_name"": ""Undulator"", ""devices"": [{...",1,,2025,8,"{'experiment_name': 'Undulator', 'devices': [{..."
7,5,6,Undulator,Z:\data\Undulator\Y2025\08-Aug\25_0805\scans\S...,Z:\data\Undulator\Y2025\08-Aug\25_0805\scans\S...,"[UC_BCaveMagSpecCam1, UC_BCaveMagSpecCam1-inte...",Shotnumber,0,1,1,...,noscan,no scan. scanning Shotnumber. None,0,"{""Scan No"": ""6"", ""ScanStartInfo"": ""no scan. sc...","{""experiment_name"": ""Undulator"", ""devices"": [{...",1,,2025,8,"{'experiment_name': 'Undulator', 'devices': [{..."
8,5,7,Undulator,Z:\data\Undulator\Y2025\08-Aug\25_0805\scans\S...,Z:\data\Undulator\Y2025\08-Aug\25_0805\scans\S...,"[UC_ALineEBeam3, U_BCaveICT, U_RTA4000, Z_Test...",Shotnumber,0,1,1,...,noscan,no scan opn bhaci bam. scanning Shotnumber. None,0,"{""Scan No"": ""7"", ""ScanStartInfo"": ""no scan opn...","{""experiment_name"": ""Undulator"", ""devices"": [{...",1,,2025,8,"{'experiment_name': 'Undulator', 'devices': [{..."
