# Examples of interfacing with geecs data

## Note:
geecs_data_utils makes use of geecs_paths_config which is loaded in the background. This looks for a specific config file which has details about the users desired defaults. If this config doesn't exist, some basic attributes need to be set.

In [None]:
from geecs_data_utils import ScanData, ScanTag

# if no config file exists, set base path to experiment data as below.
# Assumption is that data is stored like <base_path>/<experiment name>
# ScanPaths.paths_config.base_path = Path('Z:/data')

# Create a unique identifier for the a scan
tag = ScanTag(year=2025, month=8, day=7, number=5, experiment="Undulator")

# Create ScanData object
sd = ScanData(tag=tag)

## Many available methods for interfacing

### Load the sFile as a pandas dataframe

In [None]:
df = sd.get_sfile_data()
df.head(5)

### Create dataframe of shotnumber and files paths for devices restricted to shotnumbers where all devices are saved

In [None]:
# make list of tuples with <device> <file_tail>. Note, file names are typically:
# Scan<scan_number>DeviceName<shotnumber><extra><extenstion>. Here file_tail represents
# everything after <shotnumber>. For example, for magspec type device, could be ('magspec', 'interpSpec.txt')

dev_list = [
    ("Z_Test_Scope", ".dat"),
    ("Z_Test_Scope_2", ".dat"),
    ("UC_ALineEBeam3", ".png"),
]
shots = sd.get_common_shot_dataframe(dev_list)
shots.head(5)

## Build a comprehensive, fast query-able parque 'scans database'
see documentation for more details

In [None]:
from datetime import date
from geecs_data_utils.scans_database.builder import ScanDatabaseBuilder

# Inputs
data_root = ScanData.paths_config.base_path
experiment = "Undulator"
output_path = data_root / experiment / "scan_database_parquet"
date_range = (date(2022, 1, 3), date(2025, 8, 5))

ScanDatabaseBuilder.stream_to_parquet(
    data_root=data_root,
    experiment=experiment,
    output_path=output_path,
    date_range=date_range,
    buffer_size=50,
    max_scans=1000000,
    mode="overwrite",  # or 'append' or 'overwrite'
)

### After database is initially built, new scan entries are easily added using mode='append' and data_range = None

In [None]:
# Picks up from the last date in _update_log.json through today
ScanDatabaseBuilder.stream_to_parquet(
    data_root=data_root,
    experiment=experiment,
    output_path=output_path,
    date_range=None,  # <- auto-resume via sidecar
    buffer_size=50,
    mode="append",
)

### Filter scans database based on many flexible criteria

In [None]:
from geecs_data_utils.scans_database.database import ScanDatabase
from datetime import date

data_root = ScanData.paths_config.base_path
experiment = "Undulator"
output_path = data_root / experiment / "scan_database_parquet"

# initialize ScanDabase object
db = ScanDatabase(output_path)

# apply filters

# Filter by date first, for fast initial filter
db.date_range(date(2025, 8, 5), date(2025, 8, 8))

# can filter by scanparameter, case insensitive. use alias, var name or any part
db.filter_scan_parameter_contains("shotnumber")

# Filter by ecs live dump entries: (device_like, variable_like, target value, tolerance)
db.filter_ecs_value_within("hexapod", "y", target=18.5, tol=0.5)

# use defined named filters autoloaded based on experiment name. Can have various date range validities
db.apply("PMQ_inserted")

# convert result to dataframe
df = db.to_df()

print("rows:", len(df))
df.head(5)