In [None]:
!pip install transformers emoji pandas tqdm

In [None]:
import pandas as pd
import numpy as np
import re
import emoji
from transformers import pipeline
from tqdm import tqdm

classifier = pipeline("zero-shot-classification", model="valhalla/distilbart-mnli-12-1", device=0)

# zones setup
TOKYO_ZONES = {
    "KYOJIMA":   {"Coords": (35.7140, 139.8230), "Material": "Wooden",   "Base_Risk": 9.0},
    "KINSHICHO": {"Coords": (35.6962, 139.8144), "Material": "Concrete", "Base_Risk": 7.0},
    "SKYTREE":   {"Coords": (35.7100, 139.8107), "Material": "Steel",    "Base_Risk": 5.0},
    "KAMEIDO":   {"Coords": (35.6983, 139.8262), "Material": "Concrete", "Base_Risk": 4.0},
    "PARK":      {"Coords": (35.6896, 139.8166), "Material": "Open",     "Base_Risk": 2.0},
}
HQ_COORDS = (35.7100, 139.8100)

LABELS = ["Critical Life Threat", "Fire or Flames", "Building Collapse", "Explosion or Gas Leak", "Road Blocked or Inaccessible", "Safe or Controlled"]

VETO_KEYWORDS = ["TRAPPED", "CRUSHED", "PINNED", "FIRE", "EXPLOSION", "COLLAPSE", "BLEEDING", "UNCONSCIOUS", "SOS", "TSUNAMI", "GAS LEAK", "GAS ODOR"]

def find_nearest_zone(lat, long):
    min_dist = float('inf')
    best_zone = "UNKNOWN"

    for name, data in TOKYO_ZONES.items():
        dist = np.sqrt((data["Coords"][0] - lat)**2 + (data["Coords"][1] - long)**2)
        dist_km = dist * 111.0

        if name == "SKYTREE" and dist_km > 0.3:
            continue

        if dist < min_dist:
            min_dist = dist
            best_zone = name
    return best_zone

def analyze_alert(row):
    text = str(row['Raw_Message_Text'])
    battery = float(row.get('Battery_Pct', 100))
    lat, long = row['GPS_Lat'], row['GPS_Long']

    # nlp
    text_clean = emoji.demojize(text, delimiters=(" ", " "))
    result = classifier(text_clean, LABELS, multi_label=True)
    scores = dict(zip(result['labels'], result['scores']))

    # calc g1 severity
    raw_sev = (scores["Critical Life Threat"] * 100) + (scores["Fire or Flames"] * 80) + (scores["Explosion or Gas Leak"] * 100)
    g1_sev = min(raw_sev / 10, 25.0)

    # calc g2 panic
    caps_ratio = sum(1 for c in text if c.isupper()) / len(text) if len(text) > 0 else 0
    g2_pan = min((caps_ratio * 0.5) + (text.count("!") * 0.1), 1.0)

    # g3 proximity
    dist_deg = np.sqrt((HQ_COORDS[0] - lat)**2 + (HQ_COORDS[1] - long)**2)
    dist_km = dist_deg * 111.0
    g3_prox = max(10.0 - (dist_km * 2), 0.0)

    # g4 zone risk
    zone_name = find_nearest_zone(lat, long)
    zone_info = TOKYO_ZONES.get(zone_name, {"Material": "Concrete", "Base_Risk": 1})
    g4_vuln = zone_info["Base_Risk"]

    # g5 battery
    g5_bat = 100.0 - battery

    # g6 access
    g6_acc = scores["Road Blocked or Inaccessible"] * 10.0

    # g7 veto logic
    sum_danger = scores["Building Collapse"] + scores["Explosion or Gas Leak"] + scores["Fire or Flames"]

    ai_veto = (
        scores["Building Collapse"] > 0.7 or
        scores["Explosion or Gas Leak"] > 0.7 or
        scores["Critical Life Threat"] > 0.7 or
        sum_danger > 1.2 or
        (zone_info["Material"] == "Wooden" and scores["Fire or Flames"] > 0.85)
    )

    # keywords check
    keyword_match = any(k in text.upper() for k in VETO_KEYWORDS)
    is_unsafe = scores["Safe or Controlled"] < 0.5
    word_veto = keyword_match and is_unsafe

    # sensor hardware check
    sensor_veto = "SMOKE_COMPOSITION" in text.upper() or "ACCELERATION_SPIKE" in text.upper()

    g7_veto = True if (ai_veto or word_veto or sensor_veto) else False

    address_id = f"{round(lat, 4)}_{round(long, 4)}"

    return pd.Series([
        g1_sev, g2_pan, g3_prox, g4_vuln, g5_bat, g6_acc, g7_veto, zone_name, address_id, text
    ], index=['g1_Sev', 'g2_Pan', 'g3_Prox', 'g4_Vuln', 'g5_Bat', 'g6_Acc', 'g7_Veto', 'Zone', 'Address', 'Message'])

# running the script
df = pd.read_csv('data.txt')
tqdm.pandas()

df_processed = df.progress_apply(analyze_alert, axis=1)
df_clean = df_processed.dropna()

df_clean = df_clean.sort_values(by=['g7_Veto', 'g1_Sev'], ascending=False)
df_clean.to_csv('processed_data.csv', index=False)

In [3]:
import pandas as pd
import numpy as np

df = pd.read_csv('processed_data.csv')

# convert everything to float just to be safe
cols_to_fix = ['g1_Sev', 'g2_Pan', 'g3_Prox', 'g4_Vuln', 'g5_Bat', 'g6_Acc', 'g7_Veto']
for col in cols_to_fix:
    if col in df.columns:
        df[col] = df[col].astype(float)

# invert minimize criteria so everything is maximization (to be compatible with ELECTRE III)
df['g3_Prox_Inv'] = 10.0 - df['g3_Prox']
df['g5_Bat_Inv'] = 100.0 - df['g5_Bat']

# 2. electre configuration
# thresholds: q=indifference, p=preference, v=veto
CRITERIA = [
    # col, w, q, p, v
    ('g1_Sev',      9.0, 2.0,  5.0,  15.0), # life threat
    ('g4_Vuln',     8.0, 0.0,  2.0,  8.0),  # zone risk
    ('g7_Veto',     10.0,0.0,  0.5,  0.9),  # veto flag
    ('g3_Prox_Inv', 5.0, 1.0,  3.0,  100.0),# distance
    ('g5_Bat_Inv',  3.0, 10.0, 30.0, 100.0),# battery
    ('g2_Pan',      2.0, 2.0,  4.0,  100.0),# panic
    ('g6_Acc',      2.0, 2.0,  5.0,  100.0) # access
]

w_total = sum(c[1] for c in CRITERIA)


def run_electre_iii(df):
    n = len(df)
    matrix_credibility = np.zeros((n, n))

    # speed up by using dict of numpy arrays
    data_matrix = {col: df[col].values for col, _, _, _, _ in CRITERIA}

    # compare every pair
    for i in range(n):
        for k in range(n):
            if i == k: continue

            c_global = 0.0
            veto_active = False

            for col, w, q, p, v in CRITERIA:
                val_a = data_matrix[col][i]
                val_b = data_matrix[col][k]
                diff = val_a - val_b

                # concordance check
                if diff >= -q:
                    c_local = 1.0
                elif diff < -p:
                    c_local = 0.0
                else:
                    c_local = (p + diff) / (p - q)

                c_global += c_local * w

                # discordance / veto check
                if diff < -v:
                    veto_active = True
                    break

            if veto_active:
                matrix_credibility[i][k] = 0.0
            else:
                matrix_credibility[i][k] = c_global / w_total

    return matrix_credibility

cred_matrix = run_electre_iii(df)

# 4. net flow ranking
flow_plus = np.sum(cred_matrix, axis=1)
flow_minus = np.sum(cred_matrix, axis=0)
net_flow = flow_plus - flow_minus

df['Net_Flow_Score'] = net_flow

# calc global rank (BEFORE sorting buckets)
df['Global_Rank'] = df['Net_Flow_Score'].rank(ascending=False, method='min')

# 5. sorting & output
# sort by zone first for buckets, then score
df_sorted = df.sort_values(by=['Zone', 'Net_Flow_Score'], ascending=[True, False])

# print table headers
print(f"{'Rank':<6} {'NetFlow':<8} {'Status':<12} {'Veto?':<6} {'Zone':<12} {'Message'}")

current_zone = ""

for _, row in df_sorted.iterrows():
    if row['Zone'] != current_zone:
        print(f"\n>>> SECTOR: {row['Zone']}")
        current_zone = row['Zone']

    status = "Standard"
    if row['Net_Flow_Score'] > 2.0: status = "!!! EXTREME"
    elif row['Net_Flow_Score'] > 0.5: status = "CRITICAL"

    msg = str(row['Message'])[:50] + "..." if len(str(row['Message'])) > 50 else str(row['Message'])

    print(f"{int(row['Global_Rank']):<6} {row['Net_Flow_Score']:<8.2f} {status:<12} {int(row['g7_Veto']):<6} {row['Zone']:<12} {msg}")

Rank   NetFlow  Status       Veto?  Zone         Message

>>> SECTOR: KAMEIDO
24     15.66    !!! EXTREME  1      KAMEIDO      Requesting ambulance suspected arm fracture bleedi...
30     -17.86   Standard     0      KAMEIDO      ERR_DATA_PACKET_LOSS_THERMAL_ALARM_WEST_WING
31     -18.96   Standard     0      KAMEIDO      Traffic signals down vehicles colliding at interse...
33     -20.22   Standard     0      KAMEIDO      PROLONGED_DISTRESS_NOISE_DETECTED_SECTOR_4
35     -21.40   Standard     0      KAMEIDO      power gone building dark people yelling

>>> SECTOR: KINSHICHO
2      31.26    !!! EXTREME  1      KINSHICHO    FIRE FIRE KINSHICHO STATION HELP US
4      28.30    !!! EXTREME  1      KINSHICHO    fire spreading fast wind strong help
6      26.37    !!! EXTREME  1      KINSHICHO    ACTIVE_FIRE_REPORTED_CONBINI_FLAMES_VISIBLE
7      24.70    !!! EXTREME  1      KINSHICHO    PLEASE SOMEONE GET ME OUT IM TRAPPED IN THE BASEME...
8      24.33    !!! EXTREME  1      KINSHICHO    Fi