In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np
import keras_tuner as kt

In [147]:
pd.set_option('display.max_columns', None)

In [148]:
def load_data(file_path):
    data = pd.read_csv(file_path)
    return data

In [149]:
raw_data = load_data("Drone_Data/deauth_data_fused.csv")
raw_data.head()

Unnamed: 0,timestamp,mid,x,y,mpitch,mroll,myaw,z,pitch,roll,yaw,vgx,vgy,vgz,templ,temph,tof,h,bat,baro,flight_time,agx,agy,agz,mpitch.1,mroll.1,myaw.1,timestamp_c,frame.number,frame.len,wlan.ta,wlan.sa,wlan.ra,wlan.da,wlan.bssid,wlan.duration,wlan.frag,wlan.seq,wlan.fc.type,wlan.fc.subtype,wlan.flags,wlan.fcs,wlan.fcs.status,wlan.qos,wlan.qos.priority,wlan.qos.ack,wlan.ccmp.extiv,wlan.wep.key,data.len,radiotap.hdr_length,radiotap.antenna_signal,radiotap.signal_quality,radiotap.channel.flags.ofdm,radiotap.channel.flags.cck,wlan_radio.datarate,wlan_radio.channel,wlan_radio.frequency,wlan_radio.signal_strength (dbm),wlan_radio.Noise level (dbm),wlan_radio.SNR (db),wlan_radio.preamble,class
0,2024-04-24 16:42:32,-1,-100,-100,0,0,0,-100,0,0,-80,0,0,1,64,68,81,30,93,305.34,1,11.0,-12.0,-926.0,0,0,0,1713995000.0,11,124,0,0,2,1,0,48,0,15,2,8,65,3850463446,2,0,0,0,326,0,51,35,-22,78,0,0,144.444,9,2452,-22,0,0,40,0
1,2024-04-24 16:42:32,-1,-100,-100,0,0,0,-100,0,0,-80,0,0,1,64,68,81,30,93,305.34,1,11.0,-12.0,-926.0,0,0,0,1713995000.0,13,121,2,0,1,1,0,44,0,3941,2,8,66,1477863697,2,0,0,0,4,0,51,32,-28,81,1,0,54.0,9,2452,-28,0,0,20,0
2,2024-04-24 16:42:32,-1,-100,-100,0,0,0,-100,0,0,-80,0,0,1,64,68,85,40,93,305.44,1,3.0,-18.0,-934.0,0,0,0,1713995000.0,15,116,1,1,2,0,0,44,0,27,2,8,65,3874222033,2,0,0,0,28,0,46,32,-40,84,1,0,54.0,9,2452,-40,0,0,20,0
3,2024-04-24 16:42:32,-1,-100,-100,0,0,0,-100,0,0,-80,0,0,1,64,68,85,40,93,305.44,1,3.0,-18.0,-934.0,0,0,0,1713995000.0,18,119,2,1,0,0,0,48,0,10,2,8,66,1894789517,2,0,0,0,27,0,46,35,-30,78,0,0,130.0,9,2452,-30,0,0,40,0
4,2024-04-24 16:42:32,-1,-100,-100,0,0,0,-100,0,0,-80,0,0,0,64,68,89,40,93,305.44,1,-6.0,2.0,-945.0,0,0,0,1713995000.0,29,284,1,1,2,0,0,44,0,28,2,8,73,251167194,2,0,0,0,29,0,214,32,-40,78,1,0,48.0,9,2452,-40,0,0,20,0


In [93]:
count_all_na = raw_data.isna().any(axis=1).sum()
print("Number of rows with any NaN values:", count_all_na)

Number of rows with any NaN values: 0


In [157]:
columns_with_one_value = raw_data.columns[raw_data.nunique() == 1]

print("Columns with only one unique value:")
print(columns_with_one_value)

Columns with only one unique value:
Index(['wlan.frag', 'wlan.qos.ack', 'wlan_radio.Noise level (dbm)',
       'wlan_radio.SNR (db)'],
      dtype='object')


In [158]:
def drop_columns(data):
    data['timestamp'] = pd.to_datetime(data['timestamp'])

    # Sort by the 'Timestamp' column
    data = data.sort_values(by='timestamp')
    
    # Drop the 'Timestamp' column
    data = data.drop(columns=['timestamp','wlan_radio.Noise level (dbm)', 'wlan_radio.SNR (db)', 'wlan.frag', 'wlan.qos.ack'])
    
    # Reset the index (optional, for cleaner output)
    data = data.reset_index(drop=True)

    return data


In [161]:
physical_columns = [
    'mid', 'x', 'y', 'z', 'pitch', 'roll', 'yaw', 'vgx', 'vgy', 'vgz', 
    'templ', 'temph', 'tof', 'h', 'bat', 'baro', 'flight_time', 'agx', 'agy', 'agz'
]

# Assign a higher weight factor to the physical columns
weight_factor = .75  # Adjust this factor based on your needs

# Multiply the physical columns by the weight factor
clean_data[physical_columns] = raw_data[physical_columns] * weight_factor

In [162]:
clean_data = drop_columns(raw_data)
columns_with_one_value = clean_data.columns[clean_data.nunique() == 1]

print("Columns with only one unique value:")
print(columns_with_one_value)

Columns with only one unique value:
Index([], dtype='object')


In [163]:
def normalize_data(df, columns = None):
    from sklearn.preprocessing import MinMaxScaler
    columns_to_normalize = df.columns.difference(['class', 'timestamp'])

    # Initialize the MinMaxScaler
    scaler = MinMaxScaler()
    
    # Normalize the selected columns
    df[columns_to_normalize] = scaler.fit_transform(df[columns_to_normalize])

In [164]:
normalize_data(clean_data)
clean_data.head()

Unnamed: 0,mid,x,y,mpitch,mroll,myaw,z,pitch,roll,yaw,vgx,vgy,vgz,templ,temph,tof,h,bat,baro,flight_time,agx,agy,agz,mpitch.1,mroll.1,myaw.1,timestamp_c,frame.number,frame.len,wlan.ta,wlan.sa,wlan.ra,wlan.da,wlan.bssid,wlan.duration,wlan.seq,wlan.fc.type,wlan.fc.subtype,wlan.flags,wlan.fcs,wlan.fcs.status,wlan.qos,wlan.qos.priority,wlan.ccmp.extiv,wlan.wep.key,data.len,radiotap.hdr_length,radiotap.antenna_signal,radiotap.signal_quality,radiotap.channel.flags.ofdm,radiotap.channel.flags.cck,wlan_radio.datarate,wlan_radio.channel,wlan_radio.frequency,wlan_radio.signal_strength (dbm),wlan_radio.preamble,class
0,0.071429,0.374532,0.353357,0.357143,0.498607,0.498607,0.252525,0.692308,0.625,0.212291,0.5,0.5,0.833333,0.272727,0.25,0.258427,0.318182,0.924528,0.817808,0.0,0.643678,0.344916,0.901502,0.357143,0.498607,0.498607,3.7e-05,0.090715,0.054545,0.5,0.5,0.666667,0.0,0.5,0.1375,0.013187,1.0,0.666667,0.663265,0.582928,1.0,0.0,0.0,0.002868,0.0,0.097973,0.869565,0.622222,0.84,1.0,0.0,0.369482,1.0,1.0,0.622222,0.0,1
1,0.071429,0.374532,0.353357,0.357143,0.498607,0.498607,0.252525,0.615385,0.625,0.212291,0.5,0.5,0.777778,0.272727,0.25,0.297753,0.363636,0.924528,0.834247,0.0,0.62069,0.34749,0.90371,0.357143,0.498607,0.498607,8.6e-05,0.096912,0.052121,0.0,0.0,0.666667,0.333333,0.5,0.15,0.161416,1.0,0.666667,0.663265,0.339658,1.0,0.0,0.0,0.051053,0.0,0.086149,1.0,0.777778,0.69,0.0,0.0,1.0,1.0,1.0,0.777778,0.116279,1
2,0.071429,0.374532,0.353357,0.357143,0.498607,0.498607,0.252525,0.615385,0.625,0.212291,0.5,0.5,0.777778,0.272727,0.25,0.297753,0.363636,0.924528,0.834247,0.0,0.62069,0.34749,0.90371,0.357143,0.498607,0.498607,8.2e-05,0.096378,0.049091,1.0,0.5,0.0,0.0,0.5,0.15,0.004151,1.0,0.666667,0.673469,0.488997,1.0,0.0,0.0,0.012829,0.0,0.077703,1.0,0.6,0.66,0.0,0.0,0.899306,1.0,1.0,0.6,0.116279,1
3,0.071429,0.374532,0.353357,0.357143,0.498607,0.498607,0.252525,0.615385,0.625,0.212291,0.5,0.5,0.777778,0.272727,0.25,0.297753,0.363636,0.924528,0.834247,0.0,0.62069,0.34749,0.90371,0.357143,0.498607,0.498607,8.2e-05,0.096057,0.047273,0.5,0.5,0.666667,0.0,0.5,0.1375,0.014896,1.0,0.666667,0.663265,0.697693,1.0,0.0,0.0,0.003233,0.0,0.077703,0.869565,0.622222,0.81,1.0,0.0,0.369482,1.0,1.0,0.622222,0.0,1
4,0.071429,0.374532,0.353357,0.357143,0.498607,0.498607,0.252525,0.615385,0.625,0.212291,0.5,0.5,0.777778,0.272727,0.25,0.297753,0.363636,0.924528,0.834247,0.0,0.62069,0.34749,0.90371,0.357143,0.498607,0.498607,7.1e-05,0.095523,0.150303,1.0,0.5,0.0,0.0,0.5,0.15,0.003907,1.0,0.666667,0.755102,0.353987,1.0,0.0,0.0,0.012776,0.0,0.359797,1.0,0.6,0.6,0.0,0.0,0.808678,1.0,1.0,0.6,0.116279,1


In [173]:
print(clean_data['class'].value_counts())

class
1    14493
0     7310
Name: count, dtype: int64


In [174]:
def create_sequences(data, time_steps):
    X = []
    y = []
    for i in range(len(data) - time_steps):
        X.append(data[i:i+time_steps])
        y.append(data[i+time_steps, -1])  # CLass is the last column
    return np.array(X), np.array(y)

In [175]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding, Dropout
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.layers import Input
from sklearn.metrics import f1_score
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.utils import class_weight

In [176]:
clean_data['class']

0        1
1        1
2        1
3        1
4        1
        ..
21798    0
21799    0
21800    0
21801    0
21802    0
Name: class, Length: 21803, dtype: int64

In [177]:
time_steps = 3  # Set your time window, e.g., 10 timesteps
X, y = create_sequences(np.hstack([clean_data, clean_data['class'].values.reshape(-1, 1)]), time_steps)

# Split the data into training, validation, and test sets (80% train, 10% validation, 10% test)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)

In [178]:
def build_initial_model(input_shape):
    model = Sequential()
    model.add(Input(shape=input_shape))
    model.add(LSTM(units=64, return_sequences=False))  # Default LSTM units
    model.add(Dropout(rate=0.3))  # Default dropout
    model.add(Dense(units=64, activation="relu"))  # Default dense layer units
    model.add(Dense(1, activation="sigmoid"))
    model.compile(
        optimizer=Adam(learning_rate=1e-3),  # Default learning rate
        loss="binary_crossentropy",
        metrics=['accuracy'],
    )
    return model

In [179]:
LSTM_model = build_initial_model(X_train.shape[1:])
LSTM_model.summary()

In [180]:
def evaluate_model(X_train, y_train, X_test, y_test):
    
    class_weights = class_weight.compute_class_weight(
    'balanced', classes=np.unique(y_train), y=y_train
    )
    class_weights = dict(enumerate(class_weights))
    
    model = build_initial_model(X_train.shape[1:])
    model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)  # Adjust epochs/batch_size as needed

    # Predictions and F1 Score Evaluation
    y_pred = (model.predict(X_test) > 0.5).astype("int32")  # Threshold 0.5
    f1 = f1_score(y_test, y_pred)
    print(f"F1 Score: {f1}")
    return model, f1

In [181]:
LSTM_model, f1 = evaluate_model(X_train, y_train, X_test, y_test)

Epoch 1/10
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9662 - loss: 0.0927
Epoch 2/10
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9997 - loss: 0.0014
Epoch 3/10
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9999 - loss: 6.7775e-04
Epoch 4/10
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9999 - loss: 9.2938e-04
Epoch 5/10
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9998 - loss: 0.0014
Epoch 6/10
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9998 - loss: 0.0013
Epoch 7/10
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 1.0000 - loss: 1.0557e-04
Epoch 8/10
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9999 - loss: 1.9434e-04
Epoch 9/10
[1m545/545[

In [182]:
y_pred_probs = LSTM_model.predict(X_test)
y_pred = (y_pred_probs > 0.5).astype(int)  # Convert probabilities to binary predictions

# Calculate F1-score
f1 = f1_score(y_test, y_pred)
print("F1-Score:", f1)

[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
F1-Score: 1.0


In [183]:
print(np.bincount(y_train.astype(int)))  # For training labels
print(np.bincount(y_test.astype(int))) 

[ 5848 11592]
[ 731 1449]


In [184]:
def build_model(hp):
    model = Sequential()
    model.add(Input(shape=X_train.shape[1:] ))  # Adjust (50, 10) as per your input data shape
    model.add(LSTM(units=hp.Int("units", min_value=32, max_value=128, step=32), return_sequences=False))
    model.add(Dropout(rate=hp.Float("dropout", min_value=0.1, max_value=0.5, step=0.1)))
    model.add(Dense(units=hp.Int("dense_units", min_value=32, max_value=128, step=32), activation="relu"))
    model.add(Dense(1, activation="sigmoid"))
    model.compile(
        optimizer=Adam(learning_rate=hp.Choice("learning_rate", [1e-3, 1e-4])),
        loss="binary_crossentropy",
        metrics=['accuracy'],
    )
    return model


In [185]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average=None, zero_division=0)
recall = recall_score(y_test, y_pred, average=None, zero_division=0)
f1 = f1_score(y_test, y_pred, average=None, zero_division=0)

print('Test Set Evaluation:')
print(f'Accuracy: {accuracy:.4f}')
print(f'Recall: {recall}')
print(f'Recall: {recall}')
print(f'f1: {f1}')
print(f'precision: {precision}')

Test Set Evaluation:
Accuracy: 1.0000
Recall: [1. 1.]
Recall: [1. 1.]
f1: [1. 1.]
precision: [1. 1.]
