In [None]:
import pandas as pd
import numpy as np

CSV_PATH = r"C:\Hackathon\gunshot\modelA_predictors.csv"  # your input file
DT_DEFAULT_S = 2.5                  # time step between rows for the same phone (seconds)

# How strict is "surprised"? 0.99 = 99th percentile residual per phone (robust baseline)
RESIDUAL_PERCENTILE = 0.99

# Frame-level trigger rule: at least this fraction of people must be "surprised"
MIN_FRACTION_SURPRISED = 0.35       # start here, tune later

# Persistence: how many ticks in a row must the condition hold? (2 ticks × 2.5s = 5s)
#PERSIST_TICKS = 2
PERSIST_TICKS = 1


In [23]:
# Load predictors
df = pd.read_csv(CSV_PATH)

# Sort so "next step" is well-defined within each phone
df = df.sort_values(["phone_id", "t"]).reset_index(drop=True)

# For each phone, align the actual next position (shift UP by 1 row so row t sees t+Δt)
df["x_next_m"] = df.groupby("phone_id")["x_t_m"].shift(-1)
df["y_next_m"] = df.groupby("phone_id")["y_t_m"].shift(-1)

# Compute Δt per phone (in case there are missing ticks). If missing, fall back to DT_DEFAULT_S.
df["dt_s"] = df.groupby("phone_id")["t"].diff().shift(-1)          # time to the NEXT row
df["dt_s"] = df["dt_s"].where(df["dt_s"] > 0, DT_DEFAULT_S)

# Drop the final row of each phone (it has no "next" point to compare to)
df = df.dropna(subset=["x_next_m", "y_next_m"]).reset_index(drop=True)

print("Rows with next-step available:", len(df))
df.head()


Rows with next-step available: 749835


Unnamed: 0,phone_id,t,x_t_m,y_t_m,vx_t_mps,vy_t_mps,x_next_m,y_next_m,dt_s
0,100000000000008,0.0,-20.470981,-19.655965,0.0,0.0,-16.038257,-15.4588,2.5
1,100000000000008,2.5,-16.038257,-15.4588,1.773089,1.678866,-11.480153,-11.398173,2.5
2,100000000000008,5.0,-11.480153,-11.398173,1.823242,1.624251,-6.944687,-7.312014,2.5
3,100000000000008,7.5,-6.944687,-7.312014,1.814186,1.634463,-2.360462,-3.280249,2.5
4,100000000000008,10.0,-2.360462,-3.280249,1.83369,1.612706,2.047011,0.943557,2.5


In [24]:
# Constant-velocity projection: x̂(t+Δt) = x(t) + vx(t)*Δt; same for y
df["x_pred_next_m"] = df["x_t_m"] + df["vx_t_mps"] * df["dt_s"]
df["y_pred_next_m"] = df["y_t_m"] + df["vy_t_mps"] * df["dt_s"]

# Residual (meters): how far off the prediction was from what actually happened
dx_err = df["x_next_m"] - df["x_pred_next_m"]
dy_err = df["y_next_m"] - df["y_pred_next_m"]
df["residual_m"] = np.sqrt(dx_err**2 + dy_err**2)

df[["phone_id","t","residual_m"]].head()

Unnamed: 0,phone_id,t,residual_m
0,100000000000008,0.0,6.104526
1,100000000000008,2.5,0.185373
2,100000000000008,5.0,0.034123
3,100000000000008,7.5,0.073048
4,100000000000008,10.0,0.261


In [25]:
# For robustness, set a high percentile threshold per phone (handles noisier/cleaner devices)
per_phone_thr = df.groupby("phone_id")["residual_m"].quantile(RESIDUAL_PERCENTILE)
df = df.merge(per_phone_thr.rename("residual_thr_m"), on="phone_id", how="left")

# Flag rows where residual is unusually large for that phone
df["residual_flag"] = (df["residual_m"] > df["residual_thr_m"]).astype(int)

print("Sample thresholds (meters):")
print(per_phone_thr.sort_values().head())
print("\nResidual_flag rate overall:", df["residual_flag"].mean().round(4))


Sample thresholds (meters):
phone_id
100000000578714    0.0
100000000578475    0.0
100000000350148    0.0
100000000350763    0.0
100000000577617    0.0
Name: residual_m, dtype: float64

Residual_flag rate overall: 0.0776


In [26]:
# How many people are active at each time? How many are "surprised"?
per_t = (df.groupby("t")
           .agg(active=("phone_id","count"),
                surprised=("residual_flag","sum"))
           .reset_index())

# Fraction surprised at each tick (scale-free)
per_t["frac_surprised"] = per_t["surprised"] / per_t["active"]

# Persistence rule: require the condition for PERSIST_TICKS consecutive ticks
# (We check the minimum fraction over the rolling window is >= MIN_FRACTION_SURPRISED)
per_t = per_t.sort_values("t").reset_index(drop=True)
roll_min = per_t["frac_surprised"].rolling(window=PERSIST_TICKS, min_periods=PERSIST_TICKS).min()
per_t["triggerA"] = (roll_min >= MIN_FRACTION_SURPRISED).astype(int).fillna(0)

per_t.head(10)


Unnamed: 0,t,active,surprised,frac_surprised,triggerA
0,0.0,50,48,0.96,1
1,2.5,50,1,0.02,0
2,5.0,50,0,0.0,0
3,7.5,50,0,0.0,0
4,10.0,50,0,0.0,0
5,12.5,50,0,0.0,0
6,15.0,50,0,0.0,0
7,17.5,44,0,0.0,0
8,20.0,34,0,0.0,0
9,22.5,32,6,0.1875,0


In [27]:
# Save detailed per-row residuals (for debugging/plots) and per-tick Trigger A
df_out = df[["phone_id","t","residual_m","residual_thr_m","residual_flag"]].copy()
df_out.to_csv(r"C:\Hackathon\gunshot\modelA_residuals_per_row.csv", index=False)

per_t_out = per_t[["t","active","surprised","frac_surprised","triggerA"]].copy()
per_t_out = per_t_out.drop(index = 0)
per_t_out.to_csv(r"C:\Hackathon\gunshot\modelA_triggerA_by_time.csv", index=False)

print("Wrote:")
print(" - modelA_residuals_per_row.csv  (", df_out.shape, ")")
print(" - modelA_triggerA_by_time.csv   (", per_t_out.shape, ")")

# Show time ranges where Trigger A is ON (contiguous segments)
on = per_t_out[per_t_out["triggerA"] == 1]["t"].values
if len(on) == 0:
    print("\nNo Trigger A intervals with current settings.")
else:
    # Find contiguous blocks (difference > one tick breaks a block)
    tick = per_t["t"].diff().dropna().mode().iloc[0]  # most common step (should be 2.5s)
    blocks = []
    start = on[0]
    prev  = on[0]
    for tt in on[1:]:
        if tt - prev > tick + 1e-6:  # gap => close the block
            blocks.append((start, prev))
            start = tt
        prev = tt
    blocks.append((start, prev))
    print("\nTrigger A active intervals (t_start → t_end):")
    for a,b in blocks:
        dur = b - a + tick
        print(f"  {a:.1f}s → {b:.1f}s   (duration ≈ {dur:.1f}s)")


Wrote:
 - modelA_residuals_per_row.csv  ( (749835, 5) )
 - modelA_triggerA_by_time.csv   ( (17279, 5) )

Trigger A active intervals (t_start → t_end):
  25142.5s → 25142.5s   (duration ≈ 2.5s)
  25202.5s → 25202.5s   (duration ≈ 2.5s)
  25232.5s → 25232.5s   (duration ≈ 2.5s)
