# Examples of interfacing with geecs data

## Note:
geecs_data_utils makes use of geecs_paths_config which is loaded in the background. This looks for a specific config file which has details about the users desired defaults. If this config doesn't exist, some basic attributes need to be set.

In [1]:
from geecs_data_utils import ScanData, ScanTag

# if no config file exists, set base path to experiment data as below.
# Assumption is that data is stored like <base_path>/<experiment name>
# ScanPaths.paths_config.base_path = Path('Z:/data')

# Create a unique identifier for the a scan
tag = ScanTag(year=2025, month=8, day=7, number=5, experiment="Undulator")

# Create ScanData object
sd = ScanData(tag=tag)

## Many available methods for interfacing

### Load the sFile as a pandas dataframe

In [2]:
df = sd.get_sfile_data()
df.head(5)

Unnamed: 0,Elapsed Time,Bin #,scan,U_BCaveICT Python Results.ChA Alias:U_BCaveICT Charge pC,U_BCaveICT Python Results.ChB Alias:U_UndulatorExit_ICT Charge pC,U_BCaveICT acq_timestamp,UC_ALineEBeam3 acq_timestamp,Z_Test_Scope_2 acq_timestamp,Z_Test_Scope acq_timestamp,Shotnumber
0,3.0,1,5,124.321792,0.690896,3837450000.0,3837450000.0,3837450000.0,3837450000.0,1
1,4.0,1,5,131.95042,0.446552,3837450000.0,3837450000.0,3837450000.0,3837450000.0,2
2,5.0,1,5,135.621877,0.555596,3837450000.0,3837450000.0,3837450000.0,3837450000.0,3
3,6.0,1,5,96.865247,0.645041,3837450000.0,3837450000.0,3837450000.0,3837450000.0,4
4,7.0,1,5,85.046595,0.576937,3837450000.0,3837450000.0,3837450000.0,3837450000.0,5


### Create dataframe of shotnumber and files paths for devices restricted to shotnumbers where all devices are saved

In [3]:
# make list of tuples with <device> <file_tail>. Note, file names are typically:
# Scan<scan_number>DeviceName<shotnumber><extra><extenstion>. Here file_tail represents
# everything after <shotnumber>. For example, for magspec type device, could be ('magspec', 'interpSpec.txt')

dev_list = [
    ("Z_Test_Scope", ".dat"),
    ("Z_Test_Scope_2", ".dat"),
    ("UC_ALineEBeam3", ".png"),
]
shots = sd.get_common_shot_dataframe(dev_list)
shots.head(5)

Unnamed: 0,shot_number,Z_Test_Scope,Z_Test_Scope_2,UC_ALineEBeam3
0,1,/Volumes/hdna2/data/Undulator/Y2025/08-Aug/25_...,/Volumes/hdna2/data/Undulator/Y2025/08-Aug/25_...,/Volumes/hdna2/data/Undulator/Y2025/08-Aug/25_...
1,2,/Volumes/hdna2/data/Undulator/Y2025/08-Aug/25_...,/Volumes/hdna2/data/Undulator/Y2025/08-Aug/25_...,/Volumes/hdna2/data/Undulator/Y2025/08-Aug/25_...
2,3,/Volumes/hdna2/data/Undulator/Y2025/08-Aug/25_...,/Volumes/hdna2/data/Undulator/Y2025/08-Aug/25_...,/Volumes/hdna2/data/Undulator/Y2025/08-Aug/25_...
3,4,/Volumes/hdna2/data/Undulator/Y2025/08-Aug/25_...,/Volumes/hdna2/data/Undulator/Y2025/08-Aug/25_...,/Volumes/hdna2/data/Undulator/Y2025/08-Aug/25_...
4,5,/Volumes/hdna2/data/Undulator/Y2025/08-Aug/25_...,/Volumes/hdna2/data/Undulator/Y2025/08-Aug/25_...,/Volumes/hdna2/data/Undulator/Y2025/08-Aug/25_...


## Build a comprehensive, fast query-able parque 'scans database'
see documentation for more details

In [None]:
from datetime import date
from geecs_data_utils.scans_database.builder import ScanDatabaseBuilder

# Inputs
data_root = ScanData.paths_config.base_path
experiment = "Undulator"
output_path = data_root / experiment / "scan_database_parquet"
date_range = (date(2022, 1, 3), date(2025, 8, 5))

ScanDatabaseBuilder.stream_to_parquet(
    data_root=data_root,
    experiment=experiment,
    output_path=output_path,
    date_range=date_range,
    buffer_size=50,
    max_scans=1000000,
    mode="overwrite",  # or 'append' or 'overwrite'
)

### After database is initially built, new scan entries are easily added using mode='append' and data_range = None

In [None]:
# Picks up from the last date in _update_log.json through today
ScanDatabaseBuilder.stream_to_parquet(
    data_root=data_root,
    experiment=experiment,
    output_path=output_path,
    date_range=None,  # <- auto-resume via sidecar
    buffer_size=50,
    mode="append",
)

### Filter scans database based on many flexible criteria

In [10]:
from geecs_data_utils.scans_database.database import ScanDatabase
from datetime import date

data_root = ScanData.paths_config.base_path
experiment = "Undulator"
output_path = data_root / experiment / "scan_database_parquet"

# initialize ScanDabase object
db = ScanDatabase(output_path)

# apply filters

# Filter by date first, for fast initial filter
db.date_range(date(2025, 8, 5), date(2025, 8, 8))

# can filter by scanparameter, case insensitive. use alias, var name or any part
db.filter_scan_parameter_contains("shotnumber")

# Filter by ecs live dump entries: (device_like, variable_like, target value, tolerance)
db.filter_ecs_value_within("hexapod", "y", target=18.5, tol=0.5)

# use defined named filters autoloaded based on experiment name. Can have various date range validities
db.apply("PMQ_inserted")

# convert result to dataframe
df = db.to_df()

print("rows:", len(df))
df.head(5)

[INFO] Loaded 1/1 filters from undulator.yml
rows: 10


Unnamed: 0,day,number,experiment,scalar_data_file,tdms_file,non_scalar_devices,scan_parameter,start,end,step_size,...,scan_mode,scan_description,background,scan_metadata_raw_fields,ecs_dump,has_analysis_dir,notes,year,month,__ecs
0,7,1,Undulator,Z:\data\Undulator\Y2025\08-Aug\25_0807\scans\S...,Z:\data\Undulator\Y2025\08-Aug\25_0807\scans\S...,[Z_Test_Scope],Shotnumber,0,1,1,...,noscan,. scanning Shotnumber. None,0,"{""Scan No"": ""1"", ""ScanStartInfo"": "". scanning ...","{""experiment_name"": ""Undulator"", ""devices"": [{...",1,,2025,8,"{'experiment_name': 'Undulator', 'devices': [{..."
1,7,2,Undulator,Z:\data\Undulator\Y2025\08-Aug\25_0807\scans\S...,Z:\data\Undulator\Y2025\08-Aug\25_0807\scans\S...,"[Z_Test_Scope, Z_Test_Scope_2]",Shotnumber,0,1,1,...,noscan,. scanning Shotnumber. None,0,"{""Scan No"": ""2"", ""ScanStartInfo"": "". scanning ...","{""experiment_name"": ""Undulator"", ""devices"": [{...",1,,2025,8,"{'experiment_name': 'Undulator', 'devices': [{..."
2,7,3,Undulator,Z:\data\Undulator\Y2025\08-Aug\25_0807\scans\S...,Z:\data\Undulator\Y2025\08-Aug\25_0807\scans\S...,"[Z_Test_Scope, Z_Test_Scope_2]",Shotnumber,0,1,1,...,noscan,. scanning Shotnumber. None,0,"{""Scan No"": ""3"", ""ScanStartInfo"": "". scanning ...","{""experiment_name"": ""Undulator"", ""devices"": [{...",1,,2025,8,"{'experiment_name': 'Undulator', 'devices': [{..."
3,7,4,Undulator,Z:\data\Undulator\Y2025\08-Aug\25_0807\scans\S...,Z:\data\Undulator\Y2025\08-Aug\25_0807\scans\S...,"[Z_Test_Scope, Z_Test_Scope_2]",Shotnumber,0,1,1,...,noscan,. scanning Shotnumber. None,0,"{""Scan No"": ""4"", ""ScanStartInfo"": "". scanning ...","{""experiment_name"": ""Undulator"", ""devices"": [{...",1,,2025,8,"{'experiment_name': 'Undulator', 'devices': [{..."
4,7,5,Undulator,Z:\data\Undulator\Y2025\08-Aug\25_0807\scans\S...,Z:\data\Undulator\Y2025\08-Aug\25_0807\scans\S...,"[UC_ALineEBeam3, U_BCaveICT, Z_Test_Scope, Z_T...",Shotnumber,0,1,1,...,noscan,statistics scan on BAM. scanning Shotnumber. None,0,"{""Scan No"": ""5"", ""ScanStartInfo"": ""statistics ...","{""experiment_name"": ""Undulator"", ""devices"": [{...",1,,2025,8,"{'experiment_name': 'Undulator', 'devices': [{..."
