# 5. Station Level Segmentation
This notebook applies the 'Density-Dominant Model' to segment stations into Behavioral Anchors, High-Potential Emerging, or Inconsistent/Noise categories.

In [1]:
import pandas as pd
from pathlib import Path

print("Starting Step 4: Final Station Segmentation (Density-Dominant Model)")

# ==========================================
# PATH SETUP 
# ==========================================
SCRIPT_DIR = Path().resolve()
PROJECT_ROOT = SCRIPT_DIR.parent

input_path = PROJECT_ROOT / "Data" / "Processed Datasets" / "refined_behavioral_scores.csv"
output_path = PROJECT_ROOT / "Data" / "Processed Datasets" / "station_behavior_segments.csv"

Starting Step 4: Final Station Segmentation (Density-Dominant Model)


In [2]:
# ==========================================
# LOAD DATA
# ==========================================
if not input_path.exists():
    print(f"Error: {input_path} not found.")
    # exit() # Commented out for notebook
else:
    df = pd.read_csv(input_path)
    print(f"Loaded {len(df):,} station-month records.")

Loaded 3,602 station-month records.


In [3]:
# ==========================================
# 1️. DENSITY SCORE
# % of months classified as Strong Mirror
# ==========================================
density_df = (
    df.groupby("start_station_name")["mirror_verdict"]
    .apply(lambda x: (x == "Strong Mirror").sum() / len(x))
    .reset_index(name="density_score")
)

print("Density score calculated.")

Density score calculated.


In [4]:
# ==========================================
# 2️. CONSISTENCY SCORE (Ranking only)
# Average Routine Score (RS) across months
# ==========================================
consistency_df = (
    df.groupby("start_station_name")["RS"]
    .mean()
    .reset_index(name="consistency_score")
)

print("Consistency score calculated.")

Consistency score calculated.


In [5]:
# ==========================================
# 3️. MERGE METRICS
# ==========================================
final_df = density_df.merge(consistency_df, on="start_station_name")

In [6]:
# ==========================================
# 4️. FINAL CLASSIFICATION (Density-Dominant)
# ==========================================
def classify(density):
    if density >= 0.40:
        return "Confirmed Behavioral Anchor"
    elif density >= 0.20:
        return "High-Potential Emerging"
    else:
        return "Inconsistent / Noise"

final_df["final_status"] = final_df["density_score"].apply(classify)

In [7]:
# ==========================================
# SAVE OUTPUT
# ==========================================
output_path.parent.mkdir(parents=True, exist_ok=True)
final_df.to_csv(output_path, index=False)

print("-" * 50)
print(f"Success. Final segments saved to:\n{output_path}")

print("\nFinal Portfolio Distribution:")
print(final_df["final_status"].value_counts())

print("\nTop Anchors (Sorted by Consistency):")
print(
    final_df[final_df["final_status"] == "Confirmed Behavioral Anchor"]
    .sort_values("consistency_score", ascending=False)
    .head(10)
)

--------------------------------------------------
Success. Final segments saved to:
C:\Users\Siddharth\Desktop\DIVVY PROJECT\Data\Processed Datasets\station_behavior_segments.csv

Final Portfolio Distribution:
final_status
Inconsistent / Noise           481
Confirmed Behavioral Anchor     18
High-Potential Emerging         12
Name: count, dtype: int64

Top Anchors (Sorted by Consistency):
                      start_station_name  density_score  consistency_score  \
464     Wacker Dr & Washington St Corral            1.0           0.616675   
195        Franklin St & Adams St Corral            1.0           0.601600   
259    LaSalle St & Washington St Corral            1.0           0.563158   
338                            NAVY PIER            1.0           0.556589   
479        Wells St & Randolph St Corral            1.0           0.521154   
255       LaSalle St & Calhoun Pl Corral            1.0           0.520388   
200              Franklin St & Monroe St            1.0      