# Norcia 2016 Phase 1: Define Time Windows

This notebook builds labeled time windows (pre-event, background, post-event) for the Norcia 2016 mainshocks. It loads the filtered Norcia metadata parquet from Phase 0 and exports `windows.csv` for downstream feature aggregation.

**Expected input:** `norcia_events.parquet` (or `norcia_2016_metadata.parquet` if Phase 0 has not been run).


In [None]:
from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path

import numpy as np
import pandas as pd


## Configuration

In [None]:
DATA_PATH = Path("norcia_events.parquet")
ALT_DATA_PATH = Path("norcia_2016_metadata.parquet")
OUTPUT_WINDOWS = Path("windows.csv")

WINDOW_HOURS = 1
HORIZONS_HOURS = [2, 6, 12, 24]
BACKGROUND_MULTIPLIER = 5
QUIET_BUFFER_HOURS = 48

MAINSHOCKS = [
    {"name": "Amatrice", "time": "2016-08-24 01:36:00+00:00", "magnitude": 6.0},
    {"name": "Visso", "time": "2016-10-26 19:18:00+00:00", "magnitude": 5.9},
    {"name": "Norcia", "time": "2016-10-30 06:40:00+00:00", "magnitude": 6.5},
]


## Load filtered Norcia metadata

In [None]:
data_path = DATA_PATH if DATA_PATH.exists() else ALT_DATA_PATH
if not data_path.exists():
    raise FileNotFoundError(
        f"{DATA_PATH} or {ALT_DATA_PATH} not found. Run Phase 0 or add norcia_2016_metadata.parquet."
    )

norcia = pd.read_parquet(data_path)

norcia["source_origin_time"] = pd.to_datetime(
    norcia["source_origin_time"], errors="coerce", utc=True
)

norcia.head()


## Build pre-event windows

In [None]:
@dataclass
class Window:
    window_id: str
    start_time: pd.Timestamp
    end_time: pd.Timestamp
    window_type: str
    hours_before_mainshock: float | None
    mainshock_name: str | None


def build_pre_event_windows(mainshocks, horizons_hours, window_hours):
    windows = []
    for shock in mainshocks:
        event_time = pd.Timestamp(shock["time"])
        for horizon in horizons_hours:
            end_time = event_time - pd.Timedelta(hours=horizon)
            start_time = end_time - pd.Timedelta(hours=window_hours)
            window_id = f"pre_{shock['name'].lower()}_{horizon:02d}h"
            windows.append(
                Window(
                    window_id=window_id,
                    start_time=start_time,
                    end_time=end_time,
                    window_type="pre_event",
                    hours_before_mainshock=float(horizon),
                    mainshock_name=shock["name"],
                )
            )
    return windows

pre_windows = build_pre_event_windows(MAINSHOCKS, HORIZONS_HOURS, WINDOW_HOURS)
pre_windows[:3]


## Build post-event windows

In [None]:
def build_post_event_windows(mainshocks, window_hours, n_windows=2):
    windows = []
    for shock in mainshocks:
        event_time = pd.Timestamp(shock["time"])
        for idx in range(n_windows):
            start_time = event_time + pd.Timedelta(hours=idx * window_hours)
            end_time = start_time + pd.Timedelta(hours=window_hours)
            window_id = f"post_{shock['name'].lower()}_{idx + 1:02d}"
            windows.append(
                Window(
                    window_id=window_id,
                    start_time=start_time,
                    end_time=end_time,
                    window_type="post_event",
                    hours_before_mainshock=None,
                    mainshock_name=shock["name"],
                )
            )
    return windows

post_windows = build_post_event_windows(MAINSHOCKS, WINDOW_HOURS)
post_windows


## Identify quiet background candidates

In [None]:
def build_candidate_windows(start, end, window_hours):
    starts = pd.date_range(start=start, end=end, freq=f"{window_hours}H", tz="UTC")
    return pd.DataFrame(
        {
            "start_time": starts,
            "end_time": starts + pd.Timedelta(hours=window_hours),
        }
    )

catalog_start = norcia["source_origin_time"].min().floor("D")
catalog_end = norcia["source_origin_time"].max().ceil("D")

candidates = build_candidate_windows(catalog_start, catalog_end, WINDOW_HOURS)

m4_events = (
    norcia.loc[norcia["source_magnitude"].fillna(-np.inf) >= 4, "source_origin_time"]
    .dropna()
    .sort_values()
)

m4_events.head()


## Filter candidates by quiet buffer and overlap

In [None]:
def is_quiet_window(start_time, event_times, buffer_hours):
    if event_times.empty:
        return True
    event_array = event_times.values
    idx = event_array.searchsorted(start_time.to_datetime64())
    nearest = []
    if idx > 0:
        nearest.append(event_array[idx - 1])
    if idx < len(event_array):
        nearest.append(event_array[idx])
    if not nearest:
        return True
    nearest = pd.to_datetime(nearest, utc=True)
    min_delta = min(abs(start_time - nearest))
    return min_delta >= pd.Timedelta(hours=buffer_hours)

pre_post = pre_windows + post_windows

def overlaps_existing(start_time, end_time, windows):
    for window in windows:
        if start_time < window.end_time and end_time > window.start_time:
            return True
    return False

quiet_mask = []
for row in candidates.itertuples(index=False):
    is_quiet = is_quiet_window(row.start_time, m4_events, QUIET_BUFFER_HOURS)
    no_overlap = not overlaps_existing(row.start_time, row.end_time, pre_post)
    quiet_mask.append(is_quiet and no_overlap)

quiet_candidates = candidates.loc[quiet_mask].copy()
quiet_candidates.head()


## Sample background windows to match time-of-day

In [None]:
pre_hours = [w.start_time.hour for w in pre_windows]
pre_hours_series = pd.Series(pre_hours)

background_windows = []
random_state = np.random.default_rng(42)

for hour in pre_hours_series.unique():
    target_count = int((pre_hours_series == hour).sum() * BACKGROUND_MULTIPLIER)
    candidates_for_hour = quiet_candidates[quiet_candidates["start_time"].dt.hour == hour]
    if candidates_for_hour.empty:
        continue
    sample_count = min(target_count, len(candidates_for_hour))
    sampled = candidates_for_hour.sample(n=sample_count, random_state=42)
    for idx, row in enumerate(sampled.itertuples(index=False), start=1):
        window_id = f"bg_{hour:02d}_{idx:03d}"
        background_windows.append(
            Window(
                window_id=window_id,
                start_time=row.start_time,
                end_time=row.end_time,
                window_type="background",
                hours_before_mainshock=None,
                mainshock_name=None,
            )
        )

len(background_windows)


## Assemble and export windows

In [None]:
all_windows = pre_windows + post_windows + background_windows

windows_df = pd.DataFrame(
    [
        {
            "window_id": w.window_id,
            "start_time": w.start_time,
            "end_time": w.end_time,
            "window_type": w.window_type,
            "hours_before_mainshock": w.hours_before_mainshock,
            "mainshock_name": w.mainshock_name,
        }
        for w in all_windows
    ]
)

windows_df = windows_df.sort_values(["start_time", "window_type"]).reset_index(drop=True)

windows_df.to_csv(OUTPUT_WINDOWS, index=False)
windows_df.head()
