In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, chi2

# Load dataset
data = pd.read_csv('./Book1.csv')

# Preprocess data
features = [
    'frame.interface_id',
    'frame.dlt',
    'frame.offset_shift',
    'frame.time_epoch',
    'frame.time_delta',
    'frame.time_delta_displayed',
    'frame.time_relative',
    'frame.len',
    'frame.cap_len',
    'frame.marked',
    'frame.ignored',
    'radiotap.version',
    'radiotap.pad',
    'radiotap.length',
    'radiotap.present.tsft',
    'radiotap.present.flags',
    'radiotap.present.rate',
    'radiotap.present.channel',
    'radiotap.present.fhss',
    'radiotap.present.dbm_antsignal',
    'radiotap.present.dbm_antnoise',
    'radiotap.present.lock_quality',
    'radiotap.present.tx_attenuation',
    'radiotap.present.db_tx_attenuation',
    'radiotap.present.dbm_tx_power',
    'radiotap.present.antenna',
    'radiotap.present.db_antsignal',
    'radiotap.present.db_antnoise',
    'radiotap.present.rxflags',
    'radiotap.present.xchannel',
    'radiotap.present.mcs',
    'radiotap.present.ampdu',
    'radiotap.present.vht',
    'radiotap.present.reserved',
    'radiotap.present.rtap_ns',
    'radiotap.present.vendor_ns',
    'radiotap.present.ext',
    'radiotap.mactime',
    'radiotap.flags.cfp',
    'radiotap.flags.preamble',
    'radiotap.flags.wep',
    'radiotap.flags.frag',
    'radiotap.flags.fcs',
    'radiotap.flags.datapad',
    'radiotap.flags.badfcs',
    'radiotap.flags.shortgi',
    'radiotap.datarate',
    'radiotap.channel.freq',
    'radiotap.channel.type.turbo',
    'radiotap.channel.type.cck',
    'radiotap.channel.type.ofdm',
    'radiotap.channel.type.2ghz',
    'radiotap.channel.type.5ghz',
    'radiotap.channel.type.passive',
    'radiotap.channel.type.dynamic',
    'radiotap.channel.type.gfsk',
    'radiotap.channel.type.gsm',
    'radiotap.channel.type.sturbo',
    'radiotap.channel.type.half',
    'radiotap.channel.type.quarter',
    'radiotap.dbm_antsignal',
    'radiotap.antenna',
    'radiotap.rxflags.badplcp',
    'wlan.fc.type_subtype',
    'wlan.fc.version',
    'wlan.fc.type',
    'wlan.fc.subtype',
    'wlan.fc.ds',
    'wlan.fc.frag',
    'wlan.fc.retry',
    'wlan.fc.pwrmgt',
    'wlan.fc.moredata',
    'wlan.fc.protected',
    'wlan.fc.order',
    'wlan.duration',
    'wlan.ra',
    'wlan.da',
    'wlan.ta',
    'wlan.sa',
    'wlan.bssid',
    'wlan.frag',
    'wlan.seq',
    'wlan.bar.type',
    'wlan.ba.control.ackpolicy',
    'wlan.ba.control.multitid',
    'wlan.ba.control.cbitmap',
    'wlan.bar.compressed.tidinfo',
    'wlan.ba.bm',
    'wlan.fcs_good',
    'wlan_mgt.fixed.capabilities.ess',
    'wlan_mgt.fixed.capabilities.ibss',
    'wlan_mgt.fixed.capabilities.cfpoll.ap',
    'wlan_mgt.fixed.capabilities.privacy',
    'wlan_mgt.fixed.capabilities.preamble',
    'wlan_mgt.fixed.capabilities.pbcc',
    'wlan_mgt.fixed.capabilities.agility',
    'wlan_mgt.fixed.capabilities.spec_man',
    'wlan_mgt.fixed.capabilities.short_slot_time',
    'wlan_mgt.fixed.capabilities.apsd',
    'wlan_mgt.fixed.capabilities.radio_measurement',
    'wlan_mgt.fixed.capabilities.dsss_ofdm',
    'wlan_mgt.fixed.capabilities.del_blk_ack',
    'wlan_mgt.fixed.capabilities.imm_blk_ack',
    'wlan_mgt.fixed.listen_ival',
    'wlan_mgt.fixed.current_ap',
    'wlan_mgt.fixed.status_code',
    'wlan_mgt.fixed.timestamp',
    'wlan_mgt.fixed.beacon',
    'wlan_mgt.fixed.aid',
    'wlan_mgt.fixed.reason_code',
    'wlan_mgt.fixed.auth.alg',
    'wlan_mgt.fixed.auth_seq',
    'wlan_mgt.fixed.category_code',
    'wlan_mgt.fixed.htact',
    'wlan_mgt.fixed.chanwidth',
    'wlan_mgt.fixed.fragment',
    'wlan_mgt.fixed.sequence',
    'wlan_mgt.tagged.all',
    'wlan_mgt.ssid',
    'wlan_mgt.ds.current_channel',
    'wlan_mgt.tim.dtim_count',
    'wlan_mgt.tim.dtim_period',
    'wlan_mgt.tim.bmapctl.multicast',
    'wlan_mgt.tim.bmapctl.offset',
    'wlan_mgt.country_info.environment',
    'wlan_mgt.rsn.version',
    'wlan_mgt.rsn.gcs.type',
    'wlan_mgt.rsn.pcs.count',
    'wlan_mgt.rsn.akms.count',
    'wlan_mgt.rsn.akms.type',
    'wlan_mgt.rsn.capabilities.preauth',
    'wlan_mgt.rsn.capabilities.no_pairwise',
    'wlan_mgt.rsn.capabilities.ptksa_replay_counter',
    'wlan_mgt.rsn.capabilities.gtksa_replay_counter',
    'wlan_mgt.rsn.capabilities.mfpr',
    'wlan_mgt.rsn.capabilities.mfpc',
    'wlan_mgt.rsn.capabilities.peerkey',
    'wlan_mgt.tcprep.trsmt_pow',
    'wlan_mgt.tcprep.link_mrg',
    'wlan.wep.iv',
    'wlan.wep.key',
    'wlan.wep.icv',
    'wlan.tkip.extiv',
    'wlan.ccmp.extiv',
    'wlan.qos.tid',
    'wlan.qos.priority',
    'wlan.qos.eosp',
    'wlan.qos.ack',
    'wlan.qos.amsdupresent',
    'wlan.qos.buf_state_indicated',
    'wlan.qos.bit4',
    'wlan.qos.txop_dur_req',
    'wlan.qos.buf_state_indicated',
    'data.len',
    'class'
]

# Assuming 'label' is the target column
data['class'] = data['class'].astype(str)
data['label'] = data['class'].apply(lambda x: 0 if x.lower() == 'normal' else 1)
data = data.drop(columns=['class'])
data.to_csv('./Book1.csv', index=False)



In [10]:
data['label'].value_counts(normalize=True)
data.isna().sum()
data.replace({"?": None}, inplace=True)



(70,)

In [20]:
data.isna().sum()



frame.interface_id            0
frame.offset_shift            0
frame.time_epoch              0
frame.time_delta              0
frame.time_delta_displayed    0
                             ..
wlan.seq                      0
wlan.fcs_good                 0
wlan.wep.key                  0
data.len                      0
label                         0
Length: 85, dtype: int64

In [18]:
columns_with_mostly_null_data = data.columns[data.isnull().mean() >= 0.5]
columns_with_mostly_null_data.shape

(0,)

In [19]:

data.drop(columns_with_mostly_null_data, axis=1, inplace=True)
data.shape

(0, 85)

In [21]:
data.dropna(inplace=True)

In [23]:
data.shape

(0, 85)

In [11]:
X = data.drop(columns=['label'])
y = data['label']



# Encode categorical features and normalize numerical features
X = pd.get_dummies(X)
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Feature selection
select_k_best = SelectKBest(chi2, k=20)  # Adjust k based on the number of important features you want to keep
X_new = select_k_best.fit_transform(X, y)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_new, y, test_size=0.3, random_state=42)

KeyboardInterrupt: 