In [1]:
from pathlib import Path
import pandas as pd

file_map = {
    '2020': 'Spring2020_Merged_goodRuns_nudgesDefined.csv',
    '2023': 'Spring2023_Merged_goodRuns_nudgesDefined.csv',
    '2025': 'Spring2025_Merged_goodRuns_nudgesDefined.csv',
}

# Candidate base folders to locate the CSVs. Extend if your data live elsewhere.
cwd = Path.cwd().resolve()
base_candidates = [cwd, cwd.parent, cwd.parent.parent, Path('/w/halld-scshelf2101/home/gjwei/AIOP/sept2025/AIOP-Photon/jupyter_notebooks'), Path('/w/halld-scshelf2101/home/gjwei/AIOP/sept2025/AIOP-Photon')]

def find_csv(name: str) -> Path:
    for base in base_candidates:
        for candidate in (base / name, base / 'jupyter_notebooks' / name):
            if candidate.exists():
                return candidate
    raise FileNotFoundError(f'Could not find {name} in any of {base_candidates}')

dataframes: dict[str, pd.DataFrame] = {}
missing: list[str] = []
for period, fname in file_map.items():
    try:
        csv_path = find_csv(fname)
        df = pd.read_csv(csv_path, low_memory=False)
        dataframes[period] = df
    except FileNotFoundError:
        missing.append(fname)

dataframes_keys = sorted(dataframes.keys())
print('Loaded periods:', dataframes_keys)
if missing:
    print('Missing files (add paths to base_candidates if needed):', missing)

Loaded periods: ['2020', '2023', '2025']


In [None]:
plane_map = {0: 'RETRACTED/AMORPHOUS', 1: '0/90', 2: '45/135'}

# Orientation templates pulled from livingston_simulation/livingston_sim.py
ORIENTATION_TEMPLATES = {
    '2020': {
        '0/90 PERP': {'pitch': 0.39, 'yaw': 1.4},
        '0/90 PARA': {'pitch': -0.73, 'yaw': 2.4},
        '45/135 PERP': {'pitch': 0.39, 'yaw': 0.73},
        '45/135 PARA': {'pitch': 1.81, 'yaw': 0.84},
    },
    '2023': {
        '0/90 PERP': {'pitch': -0.66, 'yaw': 0.17},
        '0/90 PARA': {'pitch': 0.33, 'yaw': 1.28},
        '45/135 PERP': {'pitch': -1.75, 'yaw': 0.96},
        '45/135 PARA': {'pitch': -0.28, 'yaw': 1.06},
    },
    '2025': {
        '0/90 PERP': {'pitch': 1.68, 'yaw': 1.52},
        '0/90 PARA': {'pitch': 0.59, 'yaw': 1.94},
        '45/135 PERP': {'pitch': 0.46, 'yaw': 1.94},
        '45/135 PARA': {'pitch': 0.46, 'yaw': 0.49},
    },
}

orientation_to_mode = {
    '0/90 PERP': 1,
    '0/90 PARA': 2,
    '45/135 PERP': 3,
    '45/135 PARA': 4,
}

def default_energy(period: str):
    if period == '2020':
        return 8620.0
    if period == '2023':
        return 8615.0
    return 8900.0

def choose_pitch_yaw(row):
    # Prefer RBV (actual) if present, else setpoint.
    pitch = row.get('GONI:PITCH.RBV')
    if pd.isna(pitch):
        pitch = row.get('GONI:PITCH')
    yaw = row.get('GONI:YAW.RBV')
    if pd.isna(yaw):
        yaw = row.get('GONI:YAW')
    return pitch, yaw

def infer_orientation(row, period: str) -> str:
    plane_val = row.get('CBREM:PLANE')
    base_plane = plane_map.get(plane_val, 'UNKNOWN')
    pitch, yaw = choose_pitch_yaw(row)
    templates = ORIENTATION_TEMPLATES.get(period, {})

    # If we lack pitch/yaw data, fall back to plane-only labels.
    if pd.isna(pitch) or pd.isna(yaw) or not templates:
        if base_plane in ('0/90', '45/135'):
            return f"{base_plane} UNKNOWN"
        return base_plane

    # Compute L1 distance to each template and choose the closest.
    best_label, best_dist = None, float('inf')
    for label, vals in templates.items():
        dist = abs(pitch - vals['pitch']) + abs(yaw - vals['yaw'])
        if dist < best_dist:
            best_dist = dist
            best_label = label

    # If still nothing, fall back to plane label.
    if best_label:
        return best_label
    if base_plane in ('0/90', '45/135'):
        return f"{base_plane} UNKNOWN"
    return base_plane

def summarize_by_orientation(df: pd.DataFrame, period: str) -> pd.DataFrame:
    df = df.copy()
    df['orientation'] = df.apply(lambda r: infer_orientation(r, period), axis=1)

    summary = (
        df.groupby(['orientation', 'RunNumber'])['TotalNudges_thisRun']
        .max()
        .reset_index()
    )

    # Earliest/latest nudge times per run/orientation (only where nudges occurred and timestamp is valid)
    nudges = df[(df['NudgeOccurred'] == True) & (~df['TimeStamp'].isna())]
    nudge_times = (
        nudges.groupby(['orientation', 'RunNumber'])['TimeStamp']
        .agg(
            earliest_nudge='min',
            latest_nudge='max',
            n_nudges_window='count',
        )
        .reset_index()
    )
    nudge_times['nudge_window'] = nudge_times['latest_nudge'] - nudge_times['earliest_nudge']

    summary = summary.merge(nudge_times, on=['orientation', 'RunNumber'], how='left')

    # Drop amorphous/retracted/unknown, keep runs with nudges, drop rows without valid nudge times, and exclude large nudge-window counts.
    summary = summary[summary['TotalNudges_thisRun'] > 0]
    summary = summary[~summary['orientation'].str.contains('RETRACTED|AMORPHOUS|UNKNOWN', case=False, na=False)]
    summary = summary[summary['earliest_nudge'].notna() & summary['latest_nudge'].notna()]
    summary = summary[summary['n_nudges_window'] <= 10]
    summary = summary[summary['nudge_window'] >= 100]
    return summary

def build_configs(table: pd.DataFrame, period: str):
    cfgs = []
    base_peak = default_energy(period)
    for _, row in table.iterrows():
        label = row['orientation']
        mode = orientation_to_mode.get(label)
        if mode is None:
            continue
        run_no = int(row['RunNumber'])
        earliest = int(row['earliest_nudge'])
        latest = int(row['latest_nudge'])
        time_window = (max(0, earliest - 20), latest + 50)
        cfgs.append({
            'run_number': run_no,
            'orientation_mode': mode,
            'run_period': period,
            'input_energy': base_peak,
            'base_peak': base_peak,
            'time_window': time_window,
            'energy_window': (15, 40),
        })
    return cfgs

top5_by_orientation: dict[str, pd.DataFrame] = {}
for period, df in dataframes.items():
    summary = summarize_by_orientation(df, period)
    top_rows = (
        summary.sort_values(['orientation', 'TotalNudges_thisRun'], ascending=[True, False])
        .groupby('orientation')
        .head(5)
    )
    top5_by_orientation[period] = top_rows

all_configs: list[dict] = []
for period, table in top5_by_orientation.items():
    print(f"Period {period} — top runs by nudges")
    display(table[['orientation', 'RunNumber', 'TotalNudges_thisRun', 'n_nudges_window', 'earliest_nudge', 'latest_nudge', 'nudge_window']])
    cfgs = build_configs(table, period)
    all_configs.extend(cfgs)

print("\nCopy/paste these into run_configs in simple_sim_runs_top_only:")
for cfg in all_configs:
    print(cfg)

Period 2020 — top runs by nudges


Unnamed: 0,orientation,RunNumber,TotalNudges_thisRun,n_nudges_window,earliest_nudge,latest_nudge,nudge_window
293,0/90 PARA,73078,9,9.0,689.0,829.0,140.0
316,0/90 PARA,73151,9,9.0,639.0,946.0,307.0
258,0/90 PARA,73015,8,8.0,3.0,115.0,112.0
305,0/90 PARA,73130,8,8.0,4462.0,4576.0,114.0
352,0/90 PARA,73263,8,8.0,456.0,589.0,133.0
366,0/90 PERP,72119,14,1.0,210.0,210.0,0.0
526,0/90 PERP,72730,8,1.0,302.0,302.0,0.0
548,0/90 PERP,72780,7,7.0,1347.0,1431.0,84.0
362,0/90 PERP,72115,6,6.0,1971.0,2042.0,71.0
397,0/90 PERP,72257,6,6.0,82.0,187.0,105.0


Period 2023 — top runs by nudges


Unnamed: 0,orientation,RunNumber,TotalNudges_thisRun,n_nudges_window,earliest_nudge,latest_nudge,nudge_window
165,0/90 PARA,121029,6,1.0,283.0,283.0,0.0
2,0/90 PARA,120313,3,3.0,2522.0,2601.0,79.0
37,0/90 PARA,120404,1,1.0,819.0,819.0,0.0
104,0/90 PARA,120722,1,1.0,105.0,105.0,0.0
136,0/90 PARA,120843,1,1.0,722.0,722.0,0.0
331,0/90 PERP,120740,24,1.0,234.0,234.0,0.0
335,0/90 PERP,120753,9,1.0,205.0,205.0,0.0
338,0/90 PERP,120768,8,1.0,322.0,322.0,0.0
230,0/90 PERP,120420,5,5.0,246.0,677.0,431.0
210,0/90 PERP,120315,1,1.0,153.0,153.0,0.0


Period 2025 — top runs by nudges


Unnamed: 0,orientation,RunNumber,TotalNudges_thisRun,n_nudges_window,earliest_nudge,latest_nudge,nudge_window
0,0/90 PARA,131407,33,3.0,369.0,1007.0,638.0
172,0/90 PARA,131816,9,9.0,340.0,914.0,574.0
17,0/90 PARA,131491,8,8.0,4646.0,5079.0,433.0
157,0/90 PARA,131741,7,7.0,2720.0,2768.0,48.0
270,0/90 PARA,132112,7,1.0,209.0,209.0,0.0
293,0/90 PERP,131488,61,7.0,2311.0,2379.0,68.0
288,0/90 PERP,131443,9,9.0,570.0,728.0,158.0
323,0/90 PERP,131709,9,9.0,234.0,361.0,127.0
287,0/90 PERP,131422,8,1.0,3295.0,3295.0,0.0
313,0/90 PERP,131634,8,8.0,275.0,363.0,88.0
