In [None]:
import os
import random
import numpy as np
import pandas as pd
from collections import deque

import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.cluster import KMeans

# ==== CONFIG ====
CSV_PATH = '/content/drive/MyDrive/thesis/combined_data_Auto_pilot.csv'
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SENSOR_SIZE = 10                # accel(3), gyro(3), speed, steering, throttle, brake
EPISODES = 100
GAMMA = 0.95
LR = 1e-4
MEMORY_SIZE = 10000
BATCH_SIZE = 64
SEQ_LENGTH = 5                  # temporal sequence length
EPSILON_START = 1.0
EPSILON_END = 0.1
EPSILON_DECAY = 0.995
TARGET_UPDATE_FREQ = 10         # episodes

def load_data(path):
    return pd.read_csv(path)

data = load_data(CSV_PATH)

# Active-learning pseudo-labeling via clustering GPS coords
def train_kmeans(data):
    coords = data[['latitude','longitude']].values
    kmeans = KMeans(n_clusters=2, random_state=42).fit(coords)
    labels = kmeans.labels_
    mapping = {}
    for c in np.unique(labels):
        idxs = np.where(labels == c)[0]
        majority = data.iloc[idxs]['label'].mode()[0]
        mapping[c] = 1 if majority == 'spoofed' else 0
    return kmeans, mapping

kmeans, cluster_to_label = train_kmeans(data)

def pseudo_label(idx):
    coord = data.iloc[idx][['latitude','longitude']].values.reshape(1,-1)
    cluster = kmeans.predict(coord)[0]
    return cluster_to_label[cluster]

# Build sequential sensor-state loader

def load_sequence(idx):
    seq = []
    for offset in range(SEQ_LENGTH):
        i = max(idx - offset, 0)
        row = data.iloc[i]
        sensor = row[['accel_x','accel_y','accel_z',
                      'gyro_x','gyro_y','gyro_z',
                      'speed','steering_angle','throttle','brake']].values.astype(np.float32)
        seq.insert(0, sensor)
    return torch.tensor(np.stack(seq, axis=0))  # [SEQ_LENGTH, SENSOR_SIZE]

# BiLSTM-DQN model using only sensor inputs
class BiLSTM_DQN(nn.Module):
    def __init__(self, sensor_size, action_space):
        super().__init__()
        self.sensor_fc = nn.Sequential(
            nn.Linear(sensor_size, 64),
            nn.ReLU()
        )
        self.lstm = nn.LSTM(64, 128, batch_first=True, bidirectional=True)
        self.head = nn.Sequential(
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, action_space)
        )

    def forward(self, x):  # x: [B, T, sensor_size]
        B, T, _ = x.shape
        flat = x.view(B*T, -1)
        emb = self.sensor_fc(flat)            # [B*T,64]
        seq = emb.view(B, T, -1)              # [B, T,64]
        out, _ = self.lstm(seq)               # [B, T,256]
        return self.head(out[:, -1, :])       # [B, action_space]

# Agent and networks
action_space = 3  # 0=query,1=trust,2=replace
online = BiLSTM_DQN(SENSOR_SIZE, action_space).to(DEVICE)
target = BiLSTM_DQN(SENSOR_SIZE, action_space).to(DEVICE)
target.load_state_dict(online.state_dict())
optimizer = optim.Adam(online.parameters(), lr=LR)
memory = deque(maxlen=MEMORY_SIZE)

epsilon = EPSILON_START

def select_action(state):
    global epsilon
    if random.random() < epsilon:
        return random.randint(0, action_space-1)
    with torch.no_grad():
        q = online(state.unsqueeze(0).to(DEVICE)).cpu().numpy().flatten()
    top2 = np.partition(q, -2)[-2:]
    if abs(top2[1] - top2[0]) < 0.1:
        return 0
    return int(np.argmax(q))

loss_fn = nn.MSELoss()

def replay():
    if len(memory) < BATCH_SIZE:
        return
    batch = random.sample(memory, BATCH_SIZE)
    states, actions, rewards, next_states, dones = zip(*batch)
    s = torch.stack(states).to(DEVICE)
    n = torch.stack(next_states).to(DEVICE)
    a = torch.tensor(actions, device=DEVICE)
    r = torch.tensor(rewards, device=DEVICE)
    d = torch.tensor(dones, device=DEVICE)

    q_vals = online(s).gather(1, a.unsqueeze(1)).squeeze()
    with torch.no_grad():
        next_q = target(n).max(1)[0]
    tgt = r + GAMMA * next_q * (~d)
    loss = loss_fn(q_vals, tgt)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

# Training loop
print("🎮 Training Active DQN with BiLSTM for GPS spoofing...")
for ep in range(1, EPISODES+1):
    total_reward = 0
    for idx in range(SEQ_LENGTH, len(data)-1):
        state = load_sequence(idx)
        next_state = load_sequence(idx+1)
        true_label = 1 if data.iloc[idx]['label']=='spoofed' else 0

        action = select_action(state)
        if action == 0:
            # query expert -> pseudo-label
            _ = pseudo_label(idx)
            reward = 0
            done = False
        else:
            correct = (action==2 and true_label==1) or (action==1 and true_label==0)
            reward = 1 if correct else -1
            done = (idx == len(data)-2)
            total_reward += reward

        memory.append((state, action, reward, next_state, done))
        replay()

    epsilon = max(EPSILON_END, epsilon * EPSILON_DECAY)
    if ep % TARGET_UPDATE_FREQ == 0:
        target.load_state_dict(online.state_dict())
    print(f"Episode {ep}/{EPISODES} - Total Reward: {total_reward}")

# Save model
output_path = '/content/drive/MyDrive/thesis/dqn_active_bilstm_gps.pth'
torch.save(online.state_dict(), output_path)
print(f"✅ Model saved to {output_path}")
