In [None]:
import os, json, math
from pathlib import Path

import pandas as pd
import numpy as np

INPUT_CSV = r"C:\Hackathon\gunshot\gunshot_anomaly.csv"
print("Using input CSV:", INPUT_CSV)

# Output folders
OUT_DIR = Path("out")
FRAMES_DIR = OUT_DIR / "frames"
OUT_DIR.mkdir(parents=True, exist_ok=True)
FRAMES_DIR.mkdir(parents=True, exist_ok=True)


Using input CSV: C:\Hackathon\gunshot\gunshot_case1_sim.csv


In [None]:

df = pd.read_csv(INPUT_CSV)
print("Loaded columns:", list(df.columns))

lower_map = {c.lower(): c for c in df.columns}

def col(name_candidates):
    for cand in name_candidates:
        if cand in lower_map:
            return lower_map[cand]
    return None

# Accept either the new names or the old ones and convert to the new standard names
phone_col = col(["phone_id", "imei"])
time_col  = col(["t", "timestamp"])
lat_col   = col(["lat"])
lon_col   = col(["lon", "lng", "long"])

required_missing = [n for n,v in {
    "phone_id/imei": phone_col,
    "t/timestamp": time_col,
    "lat": lat_col,
    "lon/lng": lon_col
}.items() if v is None]

if required_missing:
    raise ValueError(f"Missing required columns in CSV: {required_missing}")

# Rename to standard names we will use everywhere
df = df.rename(columns={
    phone_col: "phone_id",
    time_col:  "t",
    lat_col:   "lat",
    lon_col:   "lon"
})

# Keep only what we need (and in the right order)
df = df[["phone_id", "t", "lat", "lon"]]

# Coerce types (very important for mapping)
df["phone_id"] = df["phone_id"].astype(str)
df["t"] = pd.to_numeric(df["t"], errors="coerce")
df["lat"] = pd.to_numeric(df["lat"], errors="coerce")
df["lon"] = pd.to_numeric(df["lon"], errors="coerce")

# Drop rows that failed to parse
before = len(df)
df = df.dropna(subset=["t", "lat", "lon"])
after = len(df)
print(f"Dropped {before - after} rows with invalid t/lat/lon")

# Basic WGS84 range checks
bad_lat = ~df["lat"].between(-90, 90)
bad_lon = ~df["lon"].between(-180, 180)
bad = df[bad_lat | bad_lon]
if len(bad):
    print("WARNING: Found rows outside lat/lon range; dropping:", len(bad))
    df = df[~(bad_lat | bad_lon)]

# Sort and reset index
df = df.sort_values(["t", "phone_id"]).reset_index(drop=True)
print(df.head())
print("Rows:", len(df), "Devices:", df["phone_id"].nunique(), "Unique times:", df["t"].nunique())


Loaded columns: ['phone_id', 't', 'lat', 'lon']
Dropped 0 rows with invalid t/lat/lon
          phone_id    t        lat         lon
0  356000000000000  0.0  38.540894 -121.749281
1  356000000000018  0.0  38.540984 -121.749490
2  356000000000026  0.0  38.541188 -121.749226
3  356000000000034  0.0  38.540922 -121.749136
4  356000000000042  0.0  38.540896 -121.749406
Rows: 1100 Devices: 100 Unique times: 11


In [None]:
# Remove exact duplicates if any
before = len(df)
df = df.drop_duplicates(subset=["phone_id", "t", "lat", "lon"])
print("Dropped duplicate rows:", before - len(df))

# Optional: round lat/lon to 6 decimals (~0.11 m) — nice balance of precision vs. filesize
df["lat"] = df["lat"].round(6)
df["lon"] = df["lon"].round(6)

# Summary by time (helps you check expectations)
summary = df.groupby("t").agg(
    n_points=("phone_id", "count"),
    n_devices=("phone_id", "nunique")
).reset_index()
print("Per-time summary (first 10 rows):")
print(summary.head(10))


Dropped duplicate rows: 0
Per-time summary (first 10 rows):
      t  n_points  n_devices
0   0.0       100        100
1   2.5       100        100
2   5.0       100        100
3   7.5       100        100
4  10.0       100        100
5  12.5       100        100
6  15.0       100        100
7  17.5       100        100
8  20.0       100        100
9  22.5       100        100


In [None]:
def meters_per_degree(lat_deg: float):
    lat = math.radians(lat_deg)
    m_per_deg_lat = 111132.92 - 559.82*math.cos(2*lat) + 1.175*math.cos(4*lat) - 0.0023*math.cos(6*lat)
    m_per_deg_lon = 111412.84*math.cos(lat) - 93.5*math.cos(3*lat) + 0.118*math.cos(5*lat)
    return m_per_deg_lat, m_per_deg_lon

lat_min, lat_max = df["lat"].min(), df["lat"].max()
lon_min, lon_max = df["lon"].min(), df["lon"].max()

lat_ctr = float(df["lat"].median())
lon_ctr = float(df["lon"].median())

m_per_deg_lat, m_per_deg_lon = meters_per_degree(lat_ctr)

width_m  = (lon_max - lon_min) * m_per_deg_lon
height_m = (lat_max - lat_min) * m_per_deg_lat

print(f"Map center (lat, lon): ({lat_ctr:.6f}, {lon_ctr:.6f})")
print(f"BBox lat: [{lat_min:.6f}, {lat_max:.6f}]  lon: [{lon_min:.6f}, {lon_max:.6f}]")
print(f"Approx extent: width ≈ {width_m:.1f} m, height ≈ {height_m:.1f} m")

# Recommend a zoom for a ~900 px wide map: meters per pixel at zoom z ~ 156543.03392 * cos(lat) / 2^z
def recommend_zoom(width_m, lat_deg, pixels=900):
    if width_m <= 0:
        return 18
    m_per_px_needed = width_m / pixels
    m_per_px_z0_at_lat = 156543.03392 * math.cos(math.radians(lat_deg))
    z = math.log2(m_per_px_z0_at_lat / m_per_px_needed)
    return max(1, min(22, round(z, 1)))

zoom_guess = recommend_zoom(max(width_m, height_m), lat_ctr, pixels=900)
print(f"Suggested default zoom for a 900px map: z ≈ {zoom_guess}")


Map center (lat, lon): (38.541094, -121.749366)
BBox lat: [38.540548, 38.541553]  lon: [-121.750067, -121.748593]
Approx extent: width ≈ 128.5 m, height ≈ 111.6 m
Suggested default zoom for a 900px map: z ≈ 19.7


In [None]:
clean_csv_path = OUT_DIR / "gunshot_clean.csv"
df[["phone_id", "t", "lat", "lon"]].to_csv(clean_csv_path, index=False)
print("Saved cleaned CSV:", clean_csv_path.resolve())


Saved cleaned CSV: C:\Hackathon\gunshot\out\gunshot_clean.csv


In [None]:
features = []
for r in df.itertuples(index=False):
    feat = {
        "type": "Feature",
        "geometry": {"type": "Point", "coordinates": [float(r.lon), float(r.lat)]},
        "properties": {"phone_id": str(r.phone_id), "t": float(r.t)}
    }
    features.append(feat)

geojson_all = {"type": "FeatureCollection", "features": features}

all_path = OUT_DIR / "gunshot_points_all.geojson"
with open(all_path, "w", encoding="utf-8") as f:
    json.dump(geojson_all, f, ensure_ascii=False, separators=(",", ":"))
print("Saved all-points GeoJSON:", all_path.resolve(), "(features:", len(features), ")")


Saved all-points GeoJSON: C:\Hackathon\gunshot\out\gunshot_points_all.geojson (features: 1100 )


In [None]:
times = sorted(df["t"].unique().tolist())
print("Unique timestamps:", times[:5], "... total:", len(times))

index_rows = []
for idx, tval in enumerate(times):
    df_t = df[df["t"] == tval]
    feats = []
    for r in df_t.itertuples(index=False):
        feats.append({
            "type": "Feature",
            "geometry": {"type": "Point", "coordinates": [float(r.lon), float(r.lat)]},
            "properties": {"phone_id": str(r.phone_id), "t": float(r.t)}
        })
    frame = {"type": "FeatureCollection", "features": feats}
    frame_name = f"frame_{idx:03d}.geojson"
    frame_path = FRAMES_DIR / frame_name
    with open(frame_path, "w", encoding="utf-8") as f:
        json.dump(frame, f, ensure_ascii=False, separators=(",", ":"))
    index_rows.append({"index": idx, "t": float(tval), "file": str(frame_path.name)})

# Write a tiny index so your web app can load frames easily
with open(OUT_DIR / "frames_index.json", "w", encoding="utf-8") as f:
    json.dump(index_rows, f, ensure_ascii=False, indent=2)

print(f"Wrote {len(times)} frame files to:", FRAMES_DIR.resolve())
print("Frame index:", (OUT_DIR / "frames_index.json").resolve())


Unique timestamps: [0.0, 2.5, 5.0, 7.5, 10.0] ... total: 11
Wrote 11 frame files to: C:\Hackathon\gunshot\out\frames
Frame index: C:\Hackathon\gunshot\out\frames_index.json
