In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans

In [2]:
df = pd.read_csv('combined_traj_data.csv')

In [3]:
# Feature engineering
df['speed'] = np.sqrt(df['vx']**2 + df['vy']**2)
df['heading'] = np.arctan2(df['vy'], df['vx'])
df['accel'] = df['speed'].diff() / df['t'].diff()
df['turn_rate'] = df['heading'].diff() / df['t'].diff()
df['distance'] = np.sqrt(df['x'].diff()**2 + df['y'].diff()**2)

features = df[['speed', 'heading', 'accel', 'turn_rate', 'distance']]

In [None]:
features['accel'].fillna(0, inplace=True)
features['turn_rate'].fillna(0, inplace=True)
features['distance'].fillna(0, inplace=True)

In [5]:
features.head()

Unnamed: 0,speed,heading,accel,turn_rate,distance
0,14.1,1.186824,0.0,0.0,0.0
1,14.0,1.186824,-0.001587,-2.819614e-17,0.00441
2,14.1,1.186824,0.004545,8.074349000000001e-17,0.00171
3,14.0,1.186824,-0.003448,-6.125368e-17,0.001645
4,14.0,1.186824,0.0,0.0,0.005133


In [6]:
from sklearn.neighbors import LocalOutlierFactor

lof = LocalOutlierFactor()
y_pr = lof.fit_predict(features)

In [7]:
inlier_indices = y_pr == 1
features_no_outliers = features[inlier_indices]

In [8]:
features_no_outliers = features_no_outliers.reset_index(drop=True)

In [9]:
features_no_outliers.head()

Unnamed: 0,speed,heading,accel,turn_rate,distance
0,14.1,1.186824,0.004545,8.074349000000001e-17,0.00171
1,14.1,1.186824,0.000327,5.805088e-18,0.021769
2,14.1,1.186824,0.0,0.0,0.000922
3,14.1,1.186824,0.0,0.0,0.000428
4,14.1,1.186824,0.0,0.0,0.001717


In [None]:
# Cluster trajectories
kmeans = KMeans(n_clusters=5)
features_no_outliers['cluster'] = kmeans.fit_predict(features_no_outliers)

cluster_list = features_no_outliers['cluster'].tolist()
normal_cluster = max(set(cluster_list), key=cluster_list.count)

In [11]:
features_no_outliers['cluster'] = np.where(features_no_outliers['cluster'] == normal_cluster, 1, 0)

In [12]:
features_no_outliers.head()

Unnamed: 0,speed,heading,accel,turn_rate,distance,cluster
0,14.1,1.186824,0.004545,8.074349000000001e-17,0.00171,0
1,14.1,1.186824,0.000327,5.805088e-18,0.021769,0
2,14.1,1.186824,0.0,0.0,0.000922,0
3,14.1,1.186824,0.0,0.0,0.000428,0
4,14.1,1.186824,0.0,0.0,0.001717,0


In [13]:
normal_features = features_no_outliers[['speed', 'heading', 'accel', 'turn_rate', 'distance']]

In [14]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(normal_features, features_no_outliers['cluster'], test_size=0.33, random_state=42)

In [15]:
from xgboost import XGBClassifier

model = XGBClassifier(n_estimators=50,learning_rate=0.02,max_depth=5)
model.fit(X_train, y_train)

In [16]:
from sklearn.metrics import accuracy_score
# Evaluate model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.9960870296350912
