# Station Segmentation
Classifies stations into **Confirmed Behavioral Anchor**, **High-Potential Emerging**, or **Inconsistent / Noise** based on density and consistency scores.

In [1]:
import pandas as pd
from pathlib import Path

In [2]:
DATA_DIR = Path("../data/processed")
input_path = DATA_DIR / "refined_behavioral_scores.csv"
output_path = DATA_DIR / "station_behavior_segments.csv"

if not input_path.exists():
    raise FileNotFoundError("\u274c refined_behavioral_scores.csv not found.")

df = pd.read_csv(input_path)
print(f"Loaded {len(df):,} rows.")

Loaded 3,489 rows.


In [3]:
# 1. Density Score
density_df = (
    df.groupby("start_station_name")["mirror_verdict"]
    .apply(lambda x: (x == "Strong Mirror").sum() / len(x))
    .reset_index(name="density_score")
)

# 2. Consistency Score
consistency_df = (
    df.groupby("start_station_name")["routine_score"]
    .mean()
    .reset_index(name="consistency_score")
)

final_df = density_df.merge(consistency_df, on="start_station_name")
print(f"Unique stations: {len(final_df)}")

Unique stations: 500


In [4]:
# --- TIGHTENED LOGIC ---
def classify_station(density):
    if density >= 0.60:  # Must be a Strong Mirror 60%+ of the time
        return "Confirmed Behavioral Anchor"
    elif density >= 0.30:
        return "High-Potential Emerging"
    else:
        return "Inconsistent / Noise"

final_df["final_status"] = final_df["density_score"].apply(classify_station)
final_df = final_df.sort_values("consistency_score", ascending=False)
final_df.to_csv(output_path, index=False)

print("-" * 50)
print(f"\u2705 SUCCESS: Segments saved to {output_path}")
print("\nPortfolio Distribution:")
print(final_df["final_status"].value_counts())

--------------------------------------------------
âœ… SUCCESS: Segments saved to ..\data\processed\station_behavior_segments.csv

Portfolio Distribution:
final_status
Inconsistent / Noise           491
High-Potential Emerging          5
Confirmed Behavioral Anchor      4
Name: count, dtype: int64
