In [None]:
import pandas as pd
import numpy as np
import joblib
from sklearn.neighbors import LocalOutlierFactor
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from google.colab import drive

# ==== CONFIG ====
# Mount Google Drive for data access
drive.mount('/content/drive', force_remount=True)
CSV_PATH = '/content/drive/MyDrive/thesis/combined_data_Auto_pilot.csv'
WINDOW_SIZE = 10  # sliding window size

# ==== LOAD DATA ====
data = pd.read_csv(CSV_PATH)

# ==== STANDARDIZE COLUMN NAMES ====
# Rename latitude/longitude to gps_x/gps_y if needed
if 'latitude' in data.columns and 'longitude' in data.columns:
    data.rename(columns={'latitude': 'gps_x', 'longitude': 'gps_y'}, inplace=True)

# ==== GPS-FREE ESTIMATE HANDLING ====
# Use rolling mean as proxy if pL_k estimates absent
data['gps_free_x'] = data.get('gps_free_x', data['gps_x'].rolling(WINDOW_SIZE, min_periods=1).mean())
data['gps_free_y'] = data.get('gps_free_y', data['gps_y'].rolling(WINDOW_SIZE, min_periods=1).mean())

# ==== DIFFERENTIAL FEATURES ====
data['diff_x'] = data['gps_x'] - data['gps_free_x']
data['diff_y'] = data['gps_y'] - data['gps_free_y']

# ==== SENSOR FEATURES ====
sensor_cols = ['accel_x','accel_y','accel_z','gyro_x','gyro_y','gyro_z',
               'speed','steering_angle','throttle','brake']
avail_sensors = [c for c in sensor_cols if c in data.columns]
if avail_sensors:
    print(f"Including sensor features: {avail_sensors}")

# ==== FEATURE MATRIX ====
feat_cols = ['diff_x','diff_y'] + avail_sensors
features = data[feat_cols].ffill().values

# ==== LABELS ====
y_true = data['label'].apply(lambda x: 1 if str(x).lower()=='spoofed' else 0).values

# ==== TRAIN-TEST SPLIT ====
X_train = features[y_true==0]
X_test = features
y_test = y_true

# ==== STANDARDIZATION ====
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# ==== SLIDING WINDOW ====
def sliding_window(X, w):
    return np.array([X[i-w:i].flatten() for i in range(w, len(X))])

X_train_win = sliding_window(X_train, WINDOW_SIZE)
X_test_win = sliding_window(X_test, WINDOW_SIZE)
y_test_win = y_test[WINDOW_SIZE:]

# ==== LOF MODEL ====
# Tuned for combined features\lof = LocalOutlierFactor(
    n_neighbors=20,
    leaf_size=30,
    novelty=True,
    contamination=0.01
)
lof.fit(X_train_win)

# ==== PREDICTION ====
y_pred = lof.predict(X_test_win)
y_pred = np.where(y_pred==-1, 1, 0)

# ==== EVALUATION ====
print("\n🔍 LOF Anomaly Detection Results:")
print(classification_report(y_test_win, y_pred, target_names=['Normal','Spoofed'], digits=4))
print("Confusion Matrix:\n", confusion_matrix(y_test_win, y_pred))

# ==== SAVE ARTIFACTS ====
out_dir = '/content/drive/MyDrive/thesis/implementation'
joblib.dump(lof, f"{out_dir}/lof_model_ext.pkl")
joblib.dump(scaler, f"{out_dir}/scaler_ext.pkl")
