In [2]:
import pandas as pd

df = pd.read_csv('D:\\Projects\\Human-Behavior-Drift-Detection-System\\data\\raw\\movement_edges.csv')  

In [3]:
# Flow ratio per edge
flow_ratio = df["Current_Flow"] / df["Flow_Capacity"]
df = df.assign(Flow_Ratio=flow_ratio)
df.head()

Unnamed: 0,Event_ID,Source_Seat,Target_Seat,Path_Type,Distance,Flow_Capacity,Current_Flow,Congestion_Level,Flow_Ratio
0,201,C09,C03,Ramp,9.0,247,151,0.61,0.611336
1,201,C02,C42,Stairs,6.4,142,97,0.68,0.683099
2,201,C18,C10,Corridor,26.8,174,120,0.69,0.689655
3,201,C29,C18,Ramp,26.9,293,126,0.43,0.430034
4,201,C05,C02,Ramp,18.2,233,217,0.93,0.93133


In [4]:
# Avg congestion per Event_ID
avg_congestion = (
    df.groupby("Event_ID")["Congestion_Level"]
      .mean()
      .rename("Avg_Congestion")
)
avg_congestion.head()

Event_ID
201    0.614915
202    0.665169
203    0.644153
204    0.646807
205    0.652941
Name: Avg_Congestion, dtype: float64

In [5]:
# High risk % per Event_ID (congestion > 0.8)
high_risk_pct = (
    df.groupby("Event_ID")["Congestion_Level"]
      .apply(lambda s: (s > 0.8).mean() * 100)
      .rename("High_Risk_Pct")
)
high_risk_pct.head()

Event_ID
201    24.576271
202    33.898305
203    27.118644
204    29.411765
205    26.890756
Name: High_Risk_Pct, dtype: float64

In [10]:
# Path preference per Event_ID (% stairs vs ramps vs corridors)
path_pref = (
    pd.crosstab(df["Event_ID"], df["Path_Type"], normalize="index") * 100
)

path_pref.columns = [f"Path_Pref_{c}" for c in path_pref.columns]
path_pref.index.name = "Event_ID"
path_pref.head()

Unnamed: 0_level_0,Path_Pref_Corridor,Path_Pref_Ramp,Path_Pref_Stairs
Event_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
201,29.661017,34.745763,35.59322
202,33.050847,32.20339,34.745763
203,35.59322,29.661017,34.745763
204,33.613445,32.773109,33.613445
205,29.411765,30.252101,40.336134


In [7]:
# Network stress per Event_ID (sum of flow ratio)
network_stress = (
    df.groupby("Event_ID")["Flow_Ratio"]
      .sum()
      .rename("Network_Stress")
)
network_stress.head()

Event_ID
201    72.623211
202    78.513276
203    75.943769
204    76.971164
205    77.641544
Name: Network_Stress, dtype: float64

In [11]:
# Combine into behavior vectors
behavior_vectors = pd.concat(
    [avg_congestion, high_risk_pct, network_stress, path_pref], axis=1
)

behavior_vectors.index.name = "Event_ID"
behavior_vectors = behavior_vectors.reset_index()

behavior_vectors

Unnamed: 0,Event_ID,Avg_Congestion,High_Risk_Pct,Network_Stress,Path_Pref_Corridor,Path_Pref_Ramp,Path_Pref_Stairs
0,201,0.614915,24.576271,72.623211,29.661017,34.745763,35.59322
1,202,0.665169,33.898305,78.513276,33.050847,32.20339,34.745763
2,203,0.644153,27.118644,75.943769,35.59322,29.661017,34.745763
3,204,0.646807,29.411765,76.971164,33.613445,32.773109,33.613445
4,205,0.652941,26.890756,77.641544,29.411765,30.252101,40.336134
5,206,0.616807,25.210084,73.384496,31.092437,31.932773,36.97479
6,207,0.668889,32.478632,78.211693,36.752137,29.91453,33.333333
7,208,0.636496,28.205128,74.497276,32.478632,35.897436,31.623932
8,209,0.639828,26.724138,74.193096,29.310345,38.793103,31.896552
9,210,0.602393,23.076923,70.496418,27.350427,32.478632,40.17094


In [12]:
# Export behavior vectors to CSV
output_path = "D:/Projects/Human-Behavior-Drift-Detection-System/data/processed/behavior_vectors.csv"

# Ensure directory exists
import os
os.makedirs(os.path.dirname(output_path), exist_ok=True)

behavior_vectors.to_csv(output_path, index=False)
output_path

'D:/Projects/Human-Behavior-Drift-Detection-System/data/processed/behavior_vectors.csv'