# LSTM CNN. Пробная модель.

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import (roc_auc_score, f1_score, 
                             confusion_matrix, roc_curve, auc)
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Input, Dense, LSTM, Conv1D, 
                                   MaxPooling1D, Flatten, concatenate, Dropout)
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf
import joblib

data_path = r'D:\Проекты\Дипломаня работа\DoFitN\Code\DoFitN\new_code\data\All_data.csv'
model_dir = r'D:\Проекты\Дипломаня работа\DoFitN\Code\DoFitN\Old_code\main\old_DL\V1\save_model'

df = pd.read_csv(data_path)
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

# Кодирование категориальных переменных
cat_cols = ['sender_mac', 'sender_ip', 'target_mac', 'target_ip']
label_encoders = {}
for col in cat_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Сохранение LabelEncoders
label_encoders_path = os.path.join(model_dir, 'label_encoders.pkl')
joblib.dump(label_encoders, label_encoders_path)

# Нормализация числовых переменных
scaler = StandardScaler()
num_cols = ['timestamp', 'operation']
df[num_cols] = scaler.fit_transform(df[num_cols])

# Сохранение Scaler
scaler_path = os.path.join(model_dir, 'scaler.pkl')
joblib.dump(scaler, scaler_path)

# Создание временных окон
def create_sequences(data, labels, window_size):
    X, y_diff = [], []
    for i in range(window_size, len(data)):
        X.append(data[i - window_size:i])
        if i > 0:
            y_diff.append(labels[i] - labels[i - 1])
    return np.array(X), np.array(y_diff)

window_size = 30  # Размер временного окна
X, y = create_sequences(df.drop('label', axis=1).values, df['label'].values, window_size)

# Разделение на обучающую и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Создание модели
def create_model(input_shape):
    cnn_input = Input(shape=input_shape)
    cnn = Conv1D(64, 3, activation='relu')(cnn_input)
    cnn = MaxPooling1D(2)(cnn)
    cnn = Flatten()(cnn)
    cnn = Dropout(0.3)(cnn)

    lstm = LSTM(50, return_sequences=True)(cnn_input)
    lstm = LSTM(25)(lstm)
    lstm = Dropout(0.3)(lstm) 

    merged = concatenate([cnn, lstm])

    dense = Dense(20, activation='relu')(merged)
    dense = Dropout(0.3)(dense)  
    output = Dense(1, activation='sigmoid')(dense)

    model = Model(inputs=cnn_input, outputs=output)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy', 
                           tf.keras.metrics.AUC(name='auc')])
    return model

model = create_model((window_size, X_train.shape[2]))
model.summary()

# Обучение модели
early_stop = EarlyStopping(monitor='val_auc', patience=10, mode='max', restore_best_weights=True)
history = model.fit(X_train, y_train,
                    epochs=20,
                    batch_size=16,
                    validation_split=0.8,
                    callbacks=[early_stop])

# Оценка модели
y_pred = model.predict(X_test).ravel()
y_pred_class = (y_pred > 0.5).astype(int)

print(f"AUC: {roc_auc_score(y_test, y_pred):.3f}")
print(f"F1-Score: {f1_score(y_test, y_pred_class):.3f}")

# ROC Curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred)
roc_auc = auc(fpr, tpr)

plt.figure()
plt.plot(fpr, tpr, label=f'AUC = {roc_auc:.2f}')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.show()

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred_class)
sns.heatmap(cm, annot=True, fmt='d')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

# Графики обучения
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training History')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()
plt.show()

# Сохранение модели
model_path = os.path.join(model_dir, 'arp_spoofing_detection_model.h5')
model.save(model_path)


Epoch 1/20
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 14ms/step - accuracy: 0.4716 - auc: 0.4990 - loss: 4.7822 - val_accuracy: 0.4988 - val_auc: 0.5000 - val_loss: 0.2289
Epoch 2/20
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.4915 - auc: 0.4981 - loss: -0.4435 - val_accuracy: 0.4988 - val_auc: 0.4999 - val_loss: 0.0736
Epoch 3/20
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.4831 - auc: 0.4956 - loss: -0.2833 - val_accuracy: 0.4988 - val_auc: 0.4992 - val_loss: 0.0051
Epoch 4/20
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.4822 - auc: 0.4861 - loss: -0.1835 - val_accuracy: 0.4988 - val_auc: 0.5000 - val_loss: -0.2959
Epoch 5/20
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.4764 - auc: 0.4955 - loss: -1.4808 - val_accuracy: 0.4988 - val_auc: 0.5000 - val_loss: -0.8915
Epoch 6/20
[1m116/

ValueError: multi_class must be in ('ovo', 'ovr')

In [2]:
from sklearn.metrics import classification_report

y_train_pred = model.predict(X_train).ravel()
y_train_pred_class = (y_train_pred > 0.5).astype(int)

y_test_pred = model.predict(X_test).ravel()
y_test_pred_class = (y_test_pred > 0.5).astype(int)

print("=== TRAIN METRICS ===")
print(f"AUC: {roc_auc_score(y_train, y_train_pred):.3f}")
print(f"F1-Score: {f1_score(y_train, y_train_pred_class):.3f}")
print(classification_report(y_train, y_train_pred_class))

print("\n=== TEST METRICS ===")
print(f"AUC: {roc_auc_score(y_test, y_test_pred):.3f}")
print(f"F1-Score: {f1_score(y_test, y_test_pred_class):.3f}")
print(classification_report(y_test, y_test_pred_class))


[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
=== TRAIN METRICS ===


ValueError: multi_class must be in ('ovo', 'ovr')