In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans

In [2]:
df = pd.read_csv('combined_traj_data.csv')

In [3]:
# Feature engineering
df['speed'] = np.sqrt(df['vx']**2 + df['vy']**2)
df['heading'] = np.arctan2(df['vy'], df['vx'])
df['accel'] = df['speed'].diff() / df['t'].diff()
df['turn_rate'] = df['heading'].diff() / df['t'].diff()
df['distance'] = np.sqrt(df['x'].diff()**2 + df['y'].diff()**2)

features = df[['speed', 'heading', 'accel', 'turn_rate', 'distance']]

In [None]:
features['accel'].fillna(0, inplace=True)
features['turn_rate'].fillna(0, inplace=True)
features['distance'].fillna(0, inplace=True)

In [5]:
from sklearn.neighbors import LocalOutlierFactor

lof = LocalOutlierFactor()
y_pr = lof.fit_predict(features)

In [6]:
inlier_indices = y_pr == 1
features_no_outliers = df[inlier_indices]
features_no_outliers = features_no_outliers.reset_index(drop=True)

In [None]:
kmeans = KMeans(n_clusters=5)
features_no_outliers['cluster_kmeans'] = kmeans.fit_predict(features_no_outliers)
cluster_list = features_no_outliers['cluster_kmeans'].tolist()
normal_cluster = max(set(cluster_list), key=cluster_list.count)

In [None]:
features_no_outliers['cluster_kmeans'] = np.where(features_no_outliers['cluster_kmeans'] == normal_cluster, 1, 0)

In [None]:
normal_features = features_no_outliers[['speed', 'heading', 'accel', 'turn_rate', 'distance']]

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(normal_features, features_no_outliers['cluster_kmeans'], test_size=0.33, random_state=42)

In [None]:
from xgboost import XGBClassifier

model = XGBClassifier(n_estimators=50,learning_rate=0.02,max_depth=5)
model.fit(X_train, y_train)

In [None]:
from sklearn.metrics import accuracy_score
# Evaluate model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

In [None]:
import numpy as np

# Generate 15 normal trajectories
num_traj = 15
traj_len = 50
normal_trajectories = []
for i in range(num_traj):
    x = np.random.uniform(low=-5.0, high=5.0, size=traj_len)
    y = np.random.uniform(low=40.0, high=60.0, size=traj_len)
    vx = np.random.normal(loc=10, scale=2, size=traj_len)
    vy = np.random.normal(loc=10, scale=2, size=traj_len)
    t = np.arange(traj_len)
    traj_num = np.full(traj_len, i+1)
    trajectory = np.column_stack((x, y, vx, vy, t, traj_num)) 
    normal_trajectories.append(trajectory)

# Generate location-based anomalies
num_loc_anom = 7
loc_anom_traj = []
for i in range(num_loc_anom):
    t = np.random.randint(10, 40)
    x = np.random.uniform(low=10, high=15)
    y = np.random.uniform(low=10, high=15)
    anomaly_traj = normal_trajectories[i].copy()
    anomaly_traj[t,0] = x
    anomaly_traj[t,1] = y
    loc_anom_traj.append(anomaly_traj)

# Generate velocity-based anomalies
num_vel_anom = 8
vel_anom_traj = []
for i in range(num_vel_anom):
    t = np.random.randint(10, 40)
    vx = np.random.normal(loc=40, scale=5)
    vy = np.random.normal(loc=40, scale=5)
    anomaly_traj = normal_trajectories[i+num_loc_anom].copy()
    anomaly_traj[t,2] = vx
    anomaly_traj[t,3] = vy
    vel_anom_traj.append(anomaly_traj)

abnormal_trajectories = loc_anom_traj + vel_anom_traj

In [None]:
import pandas as pd

# List of column names
cols = ['x', 'y', 'vx', 'vy', 't', 'traj_number']

# Create empty dataframe
df = pd.DataFrame(columns=cols)

# Loop through trajectories and append to dataframe
for traj in abnormal_trajectories:
    df_temp = pd.DataFrame(traj, columns=cols)
    df = pd.concat([df, df_temp])


In [None]:
df.reset_index(drop=True)

In [None]:
# Feature engineering
df['speed'] = np.sqrt(df['vx']**2 + df['vy']**2)
df['heading'] = np.arctan2(df['vy'], df['vx'])
df['accel'] = df['speed'].diff() / df['t'].diff()
df['turn_rate'] = df['heading'].diff() / df['t'].diff()
df['distance'] = np.sqrt(df['x'].diff()**2 + df['y'].diff()**2)

In [None]:
df['accel'].fillna(0, inplace=True)
df['turn_rate'].fillna(0, inplace=True)
df['distance'].fillna(0, inplace=True)

In [None]:
features.reset_index(drop=True)

In [None]:
df['cluster_kmeans'] = kmeans.predict(df)

In [None]:
df['cluster_kmeans'] = np.where(df['cluster_kmeans'] == normal_cluster, 1, 0)

In [None]:
df.head()

In [None]:
X_val =  df[['speed', 'heading', 'accel', 'turn_rate', 'distance']]

In [None]:
Y_val = df['cluster_kmeans']

In [None]:
from sklearn.metrics import accuracy_score
# Evaluate model
y_pred_val = model.predict(X_val)
accuracy = accuracy_score(Y_val, y_pred_val)
print(f"Accuracy: {accuracy}")