 ## Import libraries

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models, losses
from tensorflow.keras.callbacks import EarlyStopping,History 
from scipy.stats import norm
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay, f1_score
import matplotlib.pyplot as plt
import seaborn as sns
import gc
import os
import pickle
from tqdm import tqdm

2024-06-09 21:10:49.450103: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-09 21:10:49.450192: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-09 21:10:49.573217: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
    print('Running on TPU ', tf.config.list_logical_devices('TPU'))
except ValueError:
    tpu = None

    
if tpu:
    strategy = tf.distribute.TPUStrategy(tpu)
elif len(tf.config.list_physical_devices('GPU'))>1:
    strategy = tf.distribute.MirroredStrategy()
    print(tf.config.list_physical_devices('GPU'))
else:
    # Default distribution strategy in TensorFlow. Works on CPU and single GPU.
    strategy = tf.distribute.get_strategy()

print("REPLICAS: ", strategy.num_replicas_in_sync)
print(strategy)
print(type(strategy))
strategy

REPLICAS:  1
<tensorflow.python.distribute.distribute_lib._DefaultDistributionStrategy object at 0x78581d084cd0>
<class 'tensorflow.python.distribute.distribute_lib._DefaultDistributionStrategy'>


<tensorflow.python.distribute.distribute_lib._DefaultDistributionStrategy at 0x78581d084cd0>

 ## Read data

In [3]:
csvdata = pd.read_csv('/kaggle/input/awid3-csv-wlan-only/CSV/awid_full.csv', low_memory=False)

In [4]:
bool_cols = ["wlan_rsna_eapol.keydes.data_bool", "data.data_bool"]

for col in bool_cols:
    csvdata[col] = csvdata[col].astype(int)

In [5]:
csvdata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15574909 entries, 0 to 15574908
Data columns (total 40 columns):
 #   Column                                   Dtype  
---  ------                                   -----  
 0   frame.len                                float64
 1   frame.number                             float64
 2   frame.time_delta_displayed               float64
 3   frame.time_epoch                         float64
 4   frame.time_relative                      float64
 5   wlan.duration                            float64
 6   wlan.fc.ds                               int64  
 7   wlan.fc.protected                        float64
 8   wlan.fc.type                             float64
 9   wlan.fc.retry                            float64
 10  wlan.fc.subtype                          float64
 11  wlan.fixed.reason_code                   float64
 12  wlan.fixed.timestamp                     float64
 13  wlan.seq                                 float64
 14  wlan_radio.data_

In [6]:
# for col in csvdata.columns:
#     if pd.api.types.is_numeric_dtype(csvdata[col]):
#         info = np.iinfo(csvdata[col].dtype) if 'int' in str(csvdata[col].dtype) else np.finfo(csvdata[col].dtype)
#         if csvdata[col].min() >= info.min and csvdata[col].max() <= info.max:
#             csvdata[col] = pd.to_numeric(csvdata[col], downcast='integer')
#             csvdata[col] = pd.to_numeric(csvdata[col], downcast='float')

In [7]:
# csvdata.info()

In [8]:
sc = StandardScaler()
sc.fit(csvdata.drop(columns=['Label']))

In [9]:
train_data, test_data = train_test_split(csvdata, test_size=0.2, stratify=csvdata['Label'], random_state=42)

In [10]:
label_mapping = {
    "Normal": 0,
    "Kr00k": 1,
    "Evil_Twin": 2,
    "Disas": 3,
    "Krack": 4,
    "Deauth": 5,
    "(Re)Assoc": 6,
    "RogueAP": 7
}

# Prepare data for each class
def prepare_class_data(train_data, label, scaler):
    class_data = train_data[train_data['Label'] == label].drop(columns=['Label'])
    return scaler.transform(class_data)

classes = list(label_mapping.keys())
class_data_dict = {label: prepare_class_data(train_data, label, sc) for label in classes}

In [11]:
for label in classes:
    print(f'{label}: {class_data_dict[label].shape}')

Normal: (12085923, 39)
Kr00k: (153442, 39)
Evil_Twin: (83861, 39)
Disas: (60105, 39)
Krack: (39992, 39)
Deauth: (31154, 39)
(Re)Assoc: (4402, 39)
RogueAP: (1048, 39)


 ## Autoencoder network

In [12]:
def create_autoencoder(input_shape, strategy):
    with strategy.scope():
        model = models.Sequential([
            layers.Input(shape=(input_shape,)),
            layers.Dense(32, activation='relu'),
            layers.Dense(16, activation='relu'),
            layers.Dense(32, activation='relu'),
            layers.Dense(input_shape)
        ])
#         From dnn
        model.compile(optimizer=tf.keras.optimizers.Adam(1e-4), loss='mse', metrics=['accuracy'])
#         model.compile(optimizer=tf.keras.optimizers.Adam(1e-4), loss='mse')
        model.summary()
    return model

In [13]:
input_shape = csvdata.shape[1] - 1
print(strategy)

<tensorflow.python.distribute.distribute_lib._DefaultDistributionStrategy object at 0x78581d084cd0>


In [14]:
autoencoders = {label: create_autoencoder(input_shape, strategy) for label in classes}

In [15]:
del csvdata
gc.collect()

13

 ## Training function

In [16]:

def train_autoencoder(label, data, model, earlystop, epochs=10000, batch_size=32768):
    history = model.fit(data, data, callbacks=[earlystop], epochs=epochs, batch_size=batch_size, validation_split=0.2, verbose=2)
    model.save(f'autoencoder_{label}.h5')
    return model, history

model_stats = {}
for label in classes:
    print(label)
    earlystop = EarlyStopping(monitor = 'val_loss', min_delta = 0, patience = 35, verbose = 1, mode='min',restore_best_weights = True)
    _, stats = train_autoencoder(label, class_data_dict[label], autoencoders[label], earlystop=earlystop)
    model_stats[label] = [stats,earlystop.stopped_epoch,earlystop.patience]

Normal
Epoch 1/10000


I0000 00:00:1717967671.164381      68 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
W0000 00:00:1717967673.413706      69 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


296/296 - 9s - 30ms/step - accuracy: 0.0237 - loss: 0.9209 - val_accuracy: 0.0794 - val_loss: 0.8605
Epoch 2/10000
296/296 - 2s - 8ms/step - accuracy: 0.1334 - loss: 0.7373 - val_accuracy: 0.2011 - val_loss: 0.6425
Epoch 3/10000
296/296 - 2s - 8ms/step - accuracy: 0.2505 - loss: 0.5380 - val_accuracy: 0.3210 - val_loss: 0.4687
Epoch 4/10000
296/296 - 2s - 8ms/step - accuracy: 0.3382 - loss: 0.4146 - val_accuracy: 0.3358 - val_loss: 0.3738
Epoch 5/10000
296/296 - 2s - 8ms/step - accuracy: 0.3517 - loss: 0.3408 - val_accuracy: 0.3717 - val_loss: 0.3141
Epoch 6/10000
296/296 - 2s - 7ms/step - accuracy: 0.3882 - loss: 0.2911 - val_accuracy: 0.3973 - val_loss: 0.2741
Epoch 7/10000
296/296 - 2s - 7ms/step - accuracy: 0.4079 - loss: 0.2570 - val_accuracy: 0.4154 - val_loss: 0.2453
Epoch 8/10000
296/296 - 2s - 7ms/step - accuracy: 0.4249 - loss: 0.2302 - val_accuracy: 0.4380 - val_loss: 0.2210
Epoch 9/10000
296/296 - 2s - 7ms/step - accuracy: 0.4658 - loss: 0.2072 - val_accuracy: 0.4885 - val_

W0000 00:00:1717971545.853835      67 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


3/3 - 3s - 989ms/step - accuracy: 0.0033 - loss: 1.1624 - val_accuracy: 0.0022 - val_loss: 1.2849
Epoch 2/10000
3/3 - 0s - 20ms/step - accuracy: 0.0033 - loss: 1.1565 - val_accuracy: 0.0022 - val_loss: 1.2789
Epoch 3/10000
3/3 - 0s - 20ms/step - accuracy: 0.0033 - loss: 1.1509 - val_accuracy: 0.0022 - val_loss: 1.2732
Epoch 4/10000
3/3 - 0s - 20ms/step - accuracy: 0.0033 - loss: 1.1455 - val_accuracy: 0.0022 - val_loss: 1.2676
Epoch 5/10000
3/3 - 0s - 19ms/step - accuracy: 0.0033 - loss: 1.1401 - val_accuracy: 0.0022 - val_loss: 1.2619
Epoch 6/10000
3/3 - 0s - 19ms/step - accuracy: 0.0034 - loss: 1.1347 - val_accuracy: 0.0024 - val_loss: 1.2563
Epoch 7/10000
3/3 - 0s - 21ms/step - accuracy: 0.0034 - loss: 1.1295 - val_accuracy: 0.0026 - val_loss: 1.2508
Epoch 8/10000
3/3 - 0s - 20ms/step - accuracy: 0.0037 - loss: 1.1244 - val_accuracy: 0.0030 - val_loss: 1.2455
Epoch 9/10000
3/3 - 0s - 20ms/step - accuracy: 0.0040 - loss: 1.1195 - val_accuracy: 0.0032 - val_loss: 1.2403
Epoch 10/10000

W0000 00:00:1717973561.430629      69 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


1/1 - 2s - 2s/step - accuracy: 0.0000e+00 - loss: 100.1245 - val_accuracy: 0.0000e+00 - val_loss: 64.1433
Epoch 2/10000
1/1 - 0s - 45ms/step - accuracy: 0.0000e+00 - loss: 99.9942 - val_accuracy: 0.0000e+00 - val_loss: 64.0599
Epoch 3/10000
1/1 - 0s - 42ms/step - accuracy: 0.0000e+00 - loss: 99.8645 - val_accuracy: 0.0000e+00 - val_loss: 63.9769
Epoch 4/10000
1/1 - 0s - 41ms/step - accuracy: 0.0000e+00 - loss: 99.7354 - val_accuracy: 0.0000e+00 - val_loss: 63.8944
Epoch 5/10000
1/1 - 0s - 41ms/step - accuracy: 0.0000e+00 - loss: 99.6071 - val_accuracy: 0.0000e+00 - val_loss: 63.8123
Epoch 6/10000
1/1 - 0s - 41ms/step - accuracy: 0.0000e+00 - loss: 99.4795 - val_accuracy: 0.0000e+00 - val_loss: 63.7307
Epoch 7/10000
1/1 - 0s - 41ms/step - accuracy: 0.0000e+00 - loss: 99.3525 - val_accuracy: 0.0000e+00 - val_loss: 63.6496
Epoch 8/10000
1/1 - 0s - 41ms/step - accuracy: 0.0000e+00 - loss: 99.2263 - val_accuracy: 0.0000e+00 - val_loss: 63.5688
Epoch 9/10000
1/1 - 0s - 40ms/step - accuracy: 

In [17]:
with open('model_stats.pkl', 'wb') as f:
    pickle.dump(model_stats, f)

In [18]:
for label in classes:
    print(label)
    print(f'First epoch val_loss:{model_stats[label][0].history["val_loss"][0]}')
    best_es=model_stats[label][1]-model_stats[label][2]
    if best_es>0:
        print(f'Best epoch {best_es} val_loss:{model_stats[label][0].history["val_loss"][best_es]}')
        if np.argmin(model_stats[label][0].history["val_loss"])==best_es:
            print("Early stop correct")
    else:
        print(f'Best epoch {np.argmin(model_stats[label][0].history["val_loss"])+1} val_loss:{model_stats[label][0].history["val_loss"][np.argmin(model_stats[label][0].history["val_loss"])]}')
        print(f'Last epoch {len(model_stats[label][0].history["val_loss"])} val_loss:{model_stats[label][0].history["val_loss"][-1]}')
        if np.argmin(model_stats[label][0].history["val_loss"])==len(model_stats[label][0].history["val_loss"])-1:
            print("Last best")
#     print(np.argmin(model_stats[label][0].history["val_loss"]))
#     print(model_stats[label][0].history.items())
#     print(len(model_stats[label][0].history["val_loss"]))

Normal
First epoch val_loss:0.860500156879425
Best epoch 1560 val_loss:0.002083671046420932
Early stop correct
Kr00k
First epoch val_loss:0.5193465352058411
Best epoch 4664 val_loss:3.522728366078809e-05
Early stop correct
Evil_Twin
First epoch val_loss:1.2848511934280396
Best epoch 4352 val_loss:0.000929793284740299
Early stop correct
Disas
First epoch val_loss:5.801222801208496
Best epoch 9989 val_loss:4.092991730431095e-05
Last epoch 10000 val_loss:4.130292290938087e-05
Krack
First epoch val_loss:12.89560317993164
Best epoch 9999 val_loss:0.008356815204024315
Last epoch 10000 val_loss:0.008356980979442596
Deauth
First epoch val_loss:0.9351128339767456
Best epoch 10000 val_loss:4.8296711611328647e-05
Last epoch 10000 val_loss:4.8296711611328647e-05
Last best
(Re)Assoc
First epoch val_loss:64.1432876586914
Best epoch 10000 val_loss:0.0006164591177366674
Last epoch 10000 val_loss:0.0006164591177366674
Last best
RogueAP
First epoch val_loss:1.5215479135513306
Best epoch 7556 val_loss:8.