In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

# Загружаем CSV-файл
df = pd.read_csv(r"D:\Проекты\Дипломаня работа\DoFitN\Code\DoFitN\new_code\data_csv\combined_features.csv")

# Объединяем все MAC-адреса перед кодированием
all_macs = pd.concat([df["src_mac"], df["dst_mac"]]).unique()
all_ips = pd.concat([df["src_ip"], df["dst_ip"]]).unique()

# Создаём и обучаем кодировщики
le_mac = LabelEncoder().fit(all_macs)
le_ip = LabelEncoder().fit(all_ips)

# Применяем кодирование
df["src_mac"] = le_mac.transform(df["src_mac"])
df["dst_mac"] = le_mac.transform(df["dst_mac"])
df["src_ip"] = le_ip.transform(df["src_ip"])
df["dst_ip"] = le_ip.transform(df["dst_ip"])

# Выбираем числовые признаки
features = [
    "src_mac", "dst_mac", "src_ip", "dst_ip", "opcode", "is_broadcast", 
    "duplicates", "requests", "replies", "packet_rate", "multiple_macs", "request_reply_ratio"
]

# Масштабируем данные
scaler = StandardScaler()
df[features] = scaler.fit_transform(df[features])

# Разделяем данные на признаки и метки
X = df[features].values
y = df["label"].values

# Функция для создания временных последовательностей (LSTM)
def create_sequences(X, y, seq_length=10):
    X_seq, y_seq = [], []
    for i in range(len(X) - seq_length):
        X_seq.append(X[i:i+seq_length])
        y_seq.append(y[i+seq_length])
    return np.array(X_seq), np.array(y_seq)

X, y = create_sequences(X, y)

# Разбиваем на train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print("Форма X_train:", X_train.shape)
print("Форма X_test:", X_test.shape)


ValueError: y contains previously unseen labels: [np.int64(18)]

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, LSTM, Dense, Flatten, Concatenate, Dropout
import matplotlib.pyplot as plt

# Размер входных данных (количество признаков)
seq_length, feature_dim = X_train.shape[1], X_train.shape[2]

# Входной слой
input_layer = Input(shape=(seq_length, feature_dim))

# CNN-часть (1D свертки)
cnn = Conv1D(filters=64, kernel_size=3, activation='relu', padding='same')(input_layer)
cnn = Conv1D(filters=32, kernel_size=3, activation='relu', padding='same')(cnn)
cnn = Flatten()(cnn)

# LSTM-часть
lstm = LSTM(64, return_sequences=True)(input_layer)
lstm = LSTM(32)(lstm)

# Объединяем CNN и LSTM
merged = Concatenate()([cnn, lstm])
merged = Dense(64, activation='relu')(merged)
merged = Dropout(0.3)(merged)
output_layer = Dense(1, activation='sigmoid')(merged)

# Создаем модель
model = Model(inputs=input_layer, outputs=output_layer)
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Выводим структуру модели
model.summary()

# Обучаем модель
history = model.fit(X_train, y_train, epochs=15, batch_size=32, validation_data=(X_test, y_test))

# Графики обучения
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.xlabel('Эпохи')
plt.ylabel('Точность')
plt.legend()
plt.title('График точности')

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.xlabel('Эпохи')
plt.ylabel('Потери')
plt.legend()
plt.title('График потерь')

plt.show()

# Сохранение модели
model.save("arp_cnn_lstm.h5")
