In [1]:
!pip install scapy

Collecting scapy
  Downloading scapy-2.5.0.tar.gz (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scapy
  Building wheel for scapy (setup.py) ... [?25l[?25hdone
  Created wheel for scapy: filename=scapy-2.5.0-py2.py3-none-any.whl size=1444330 sha256=7b790d87609b53d43f98cd6980e3d99d93204f637cd36132064ff530d8e7aa24
  Stored in directory: /root/.cache/pip/wheels/82/b7/03/8344d8cf6695624746311bc0d389e9d05535ca83c35f90241d
Successfully built scapy
Installing collected packages: scapy
Successfully installed scapy-2.5.0


In [20]:
import os
import zipfile
import tempfile
import pandas as pd
from scapy.all import *
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler

In [21]:
# Функция для извлечения признаков из pcap файла
def extract_features(file_path):
    pkts = rdpcap(file_path)

    # Пример: извлекаем количество пакетов, длину каждого пакета и т.д.
    num_packets = len(pkts)
    total_length = sum([len(pkt) for pkt in pkts])

    return {'num_packets': num_packets, 'total_length': total_length}

In [22]:
# Распаковка zip-архива во временную папку
zip_file_path = "/content/captures_IoT_Sentinel.zip"

with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    # Создаем временную папку
    with tempfile.TemporaryDirectory() as temp_folder:
        zip_ref.extractall(temp_folder)

        # Загрузка данных из файлов pcap во всех подпапках
        data = []
        labels = []

        for root, dirs, files in os.walk(temp_folder):
            for dir_name in dirs:
                dir_path = os.path.join(root, dir_name)

                # Чтение файла с MAC-адресами устройств
                mac_file_path = os.path.join(dir_path, '_iotdevice-mac.txt')
                if os.path.exists(mac_file_path):
                    with open(mac_file_path, 'r') as mac_file:
                        device_type = mac_file.read().strip()

                    # Итерация по файлам pcap
                    for file in os.listdir(dir_path):
                        if file.endswith(".pcap"):
                            file_path = os.path.join(dir_path, file)
                            features = extract_features(file_path)
                            data.append(features)
                            labels.append(device_type)

In [23]:
# Преобразование данных в DataFrame
df = pd.DataFrame(data)
df['label'] = labels

In [24]:
# Вывод списка всех уникальных устройств
unique_devices = df['label'].unique()
print("Уникальные устройства:", unique_devices)

Уникальные устройства: ['74:da:38:80:79:fc' '6c:72:20:c5:17:5a' '84:18:26:7b:5f:6b'
 '1c:5f:2b:aa:fd:4e' '5c:cf:7f:06:d9:02' '00:17:88:24:76:ff'
 '90:8d:78:a8:e1:43' 'b0:c5:54:1c:71:85' '94:10:3e:41:c2:05'
 '50:c7:bf:00:fc:a3' 'b0:c5:54:25:5b:0e' '74:da:38:4a:76:49'
 '74:da:38:80:7a:08' '5c:cf:7f:07:ae:fb' '00:24:e4:24:80:2a'
 'ac:cf:23:62:3c:6e' '94:10:3e:35:01:c1' '50:c7:bf:00:c7:03'
 '00:1a:22:05:c4:2e' '3c:49:37:03:17:f0' '20:f8:5e:ca:91:52'
 '90:8d:78:a9:3d:6f' '94:10:3e:cd:37:65' '94:10:3e:42:80:69'
 '94:10:3e:34:0c:b5' '74:da:38:23:22:7b' '3c:49:37:03:17:db'
 '90:8d:78:dd:0d:60' '00:1a:22:03:cb:be']


In [25]:
# Разделение данных на обучающий и тестовый наборы
X_train, X_test, y_train, y_test = train_test_split(df.drop('label', axis=1), df['label'], test_size=0.2, random_state=42)

In [26]:
# Нормализация данных
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [27]:
# Подбор оптимальных параметров для модели Random Forest
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

rf_model = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(rf_model, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train_scaled, y_train)

In [28]:
# Получение оптимальных параметров
best_params = grid_search.best_params_

In [29]:
# Обучение модели Random Forest с оптимальными параметрами
final_model = RandomForestClassifier(**best_params, random_state=42)
final_model.fit(X_train_scaled, y_train)

In [30]:
# Предсказание на тестовом наборе
predictions = final_model.predict(X_test_scaled)

In [33]:
# Оценка точности модели
accuracy = accuracy_score(y_test, predictions)
print(f"Точность: {accuracy}")
print("Лучшие параметры:", best_params)

Точность: 0.6727272727272727
Лучшие параметры: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}


In [34]:
# Вывод матрицы ошибок и отчета о классификации
conf_matrix = confusion_matrix(y_test, predictions)
class_report = classification_report(y_test, predictions)

print("Матрица ошибок:")
print(conf_matrix)
print("\nОтчет классификации:")
print(class_report)

Матрица ошибок:
[[ 6  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0]
 [ 0  8  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  1  0  0]
 [ 0  1  3  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  1  0  0]
 [ 0  0  0  2  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0
   0  0  0  0]
 [ 0  0  0  0  7  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0
   0  0  0  0]
 [ 0  0  0  0  0  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0]
 [ 0  0  0  0  0  0  1  3  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0]
 [ 0  0  0  0  0  0  1  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0]
 [ 0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  4  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0 14  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0]
 [ 0 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
