<a href="https://colab.research.google.com/github/SVL98/SVL98/blob/main/bluetooth_Miot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install scapy pandas scikit-learn


Collecting scapy
  Downloading scapy-2.5.0.tar.gz (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scapy
  Building wheel for scapy (setup.py) ... [?25l[?25hdone
  Created wheel for scapy: filename=scapy-2.5.0-py2.py3-none-any.whl size=1444319 sha256=f4e07e69e51445bac7751ff838ec55c50a2b0d9b98283addd8f5a6895d5dd03f
  Stored in directory: /root/.cache/pip/wheels/82/b7/03/8344d8cf6695624746311bc0d389e9d05535ca83c35f90241d
Successfully built scapy
Installing collected packages: scapy
Successfully installed scapy-2.5.0


In [4]:
from scapy.all import rdpcap
from scapy.layers.bluetooth import HCI_Hdr, HCI_Event_Hdr
import pandas as pd

def extract_hci_features(packet):
    features = {
        'length': len(packet),
        'direction': None,
        'event_code': None,
        'opcode': None,
        'status': None,
    }

    if packet.haslayer(HCI_Hdr):
        hci_hdr = packet[HCI_Hdr]
        features['direction'] = hci_hdr.direction

        if packet.haslayer(HCI_Event_Hdr):
            hci_event_hdr = packet[HCI_Event_Hdr]
            features['event_code'] = hci_event_hdr.code
            features['opcode'] = getattr(packet, 'opcode', 0)
            features['status'] = getattr(packet, 'status', -1)

    return features

# Load and extract features from the BENIGN pcap file
packets_benign = rdpcap('/content/Bluetooth_Benign_train.pcap')
features_list_benign = [extract_hci_features(packet) for packet in packets_benign]
df_benign = pd.DataFrame(features_list_benign)
df_benign['direction'].fillna('unknown', inplace=True)
df_benign['opcode'].fillna(0, inplace=True)
df_benign['status'].fillna(-1, inplace=True)
df_benign['label'] = 'BENIGN'

# Load and extract features from the DoS pcap file
packets_dos = rdpcap('/content/Bluetooth_DoS_train.pcap')
features_list_dos = [extract_hci_features(packet) for packet in packets_dos]
df_dos = pd.DataFrame(features_list_dos)
df_dos['direction'].fillna('unknown', inplace=True)
df_dos['opcode'].fillna(0, inplace=True)
df_dos['status'].fillna(-1, inplace=True)
df_dos['label'] = 'DoS'  # Label these packets as 'DoS'

# Merge the two DataFrames
df_merged = pd.concat([df_benign, df_dos], ignore_index=True)

# Display the first few rows of the merged DataFrame
print(df_merged.head())


   length direction  event_code  opcode  status   label
0       8   unknown         NaN     0.0    -1.0  BENIGN
1      11   unknown        14.0  3075.0     0.0  BENIGN
2       8   unknown         NaN     0.0    -1.0  BENIGN
3      18   unknown        14.0  4101.0     0.0  BENIGN
4      15   unknown         NaN     0.0    -1.0  BENIGN


In [15]:
print(df_merged[features].isnull().sum())


length             0
event_code    537205
opcode             0
status             0
dtype: int64


In [7]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

# Create an imputer for the 'event_code' feature
imputer = SimpleImputer(strategy='most_frequent')  # Using the most frequent value for categorical data

# Impute missing values in the 'event_code' feature
df_merged['event_code'] = imputer.fit_transform(df_merged[['event_code']])

# Now select features and normalize
features = ['length', 'event_code', 'opcode', 'status']
scaler = StandardScaler()
X_train = scaler.fit_transform(df_merged[features])
y_train = df_merged['label']

# Train the Random Forest model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)


In [8]:
print(df_merged[features].isnull().sum())


length        0
event_code    0
opcode        0
status        0
dtype: int64


In [9]:
from sklearn.ensemble import RandomForestClassifier

# Train the Random Forest classifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)


In [12]:
print(model)


RandomForestClassifier(random_state=42)


In [10]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.9798994148295275
Classification Report:
               precision    recall  f1-score   support

      BENIGN       0.98      0.90      0.94     43395
         DoS       0.98      1.00      0.99    199782

    accuracy                           0.98    243177
   macro avg       0.98      0.95      0.96    243177
weighted avg       0.98      0.98      0.98    243177



In [1]:
from scapy.all import rdpcap
from scapy.layers.bluetooth import HCI_Hdr, HCI_Event_Hdr
import pandas as pd

def extract_hci_features(packet):
    features = {
        'length': len(packet),
        'direction': None,
        'event_code': None,
        'opcode': None,
        'status': None,
    }

    if packet.haslayer(HCI_Hdr):
        hci_hdr = packet[HCI_Hdr]
        features['direction'] = hci_hdr.direction

        if packet.haslayer(HCI_Event_Hdr):
            hci_event_hdr = packet[HCI_Event_Hdr]
            features['event_code'] = hci_event_hdr.code
            features['opcode'] = getattr(packet, 'opcode', 0)
            features['status'] = getattr(packet, 'status', -1)

    return features

# Load and extract features from the BENIGN pcap file
packets_benign = rdpcap('/content/Bluetooth_Benign_test.pcap')
features_list_benign = [extract_hci_features(packet) for packet in packets_benign]
df_benign = pd.DataFrame(features_list_benign)
df_benign['direction'].fillna('unknown', inplace=True)
df_benign['opcode'].fillna(0, inplace=True)
df_benign['status'].fillna(-1, inplace=True)
df_benign['label'] = 'BENIGN'

# Load and extract features from the DoS pcap file
packets_dos = rdpcap('/content/Bluetooth_DoS_test.pcap')
features_list_dos = [extract_hci_features(packet) for packet in packets_dos]
df_dos = pd.DataFrame(features_list_dos)
df_dos['direction'].fillna('unknown', inplace=True)
df_dos['opcode'].fillna(0, inplace=True)
df_dos['status'].fillna(-1, inplace=True)
df_dos['label'] = 'DoS'  # Label these packets as 'DoS'

# Merge the two DataFrames
df_merged = pd.concat([df_benign, df_dos], ignore_index=True)

# Display the first few rows of the merged DataFrame
print(df_merged.head())


   length direction  event_code  opcode  status   label
0      11   unknown        14.0  3154.0     0.0  BENIGN
1      50   unknown        62.0     0.0    -1.0  BENIGN
2      50   unknown        62.0     0.0    -1.0  BENIGN
3      43   unknown        62.0     0.0    -1.0  BENIGN
4      40   unknown        62.0     0.0    -1.0  BENIGN


In [21]:
# Example feature extraction and imputation
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

# Ensure no missing values
imputer = SimpleImputer(strategy='most_frequent')
df_merged['event_code'] = imputer.fit_transform(df_merged[['event_code']])
df_merged['opcode'] = imputer.fit_transform(df_merged[['opcode']])
df_merged['status'] = imputer.fit_transform(df_merged[['status']])

# Extract features and scale
features = ['length', 'event_code', 'opcode', 'status']
scaler = StandardScaler()
X_test = scaler.fit_transform(df_merged[features])

y_true = df_merged['label']

# Ensure model is loaded or trained
y_pred = model.predict(X_test)

# Add predictions to DataFrame
df_merged['predictions'] = y_pred


In [13]:


# Display the DataFrame with predictions
print(df_merged.head())

   length direction  event_code  opcode  status   label predictions
0       8   unknown        19.0     0.0    -1.0  BENIGN      BENIGN
1      11   unknown        14.0  3075.0     0.0  BENIGN      BENIGN
2       8   unknown        19.0     0.0    -1.0  BENIGN      BENIGN
3      18   unknown        14.0  4101.0     0.0  BENIGN      BENIGN
4      15   unknown        19.0     0.0    -1.0  BENIGN      BENIGN


In [14]:
print(df_merged['predictions'].value_counts())


predictions
DoS       1015751
BENIGN     200133
Name: count, dtype: int64


In [22]:
from sklearn.metrics import classification_report
print("Accuracy:",accuracy_score(y_true,y_pred))

# Generate a classification report
print(classification_report(df_merged['label'], df_merged['predictions']))


Accuracy: 0.9801971240677565
              precision    recall  f1-score   support

      BENIGN       0.98      0.90      0.94    217493
         DoS       0.98      1.00      0.99    998391

    accuracy                           0.98   1215884
   macro avg       0.98      0.95      0.97   1215884
weighted avg       0.98      0.98      0.98   1215884

