In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import numpy as np

In [2]:
train_data = pd.read_csv(r"C:\Users\Almas\Downloads\UNSW_NB15_training-set.csv")
test_data = pd.read_csv(r"C:\Users\Almas\Downloads\UNSW_NB15_testing-set.csv")

In [3]:
encoder = LabelEncoder()
scaler = StandardScaler()

In [4]:
columns_to_encode = ['proto', 'service', 'state', 'attack_cat']
for column in columns_to_encode:
    unique_values_train = train_data[column].unique()
    encoder.fit(unique_values_train)  # Используем только уникальные значения из обучающего набора
    train_data[column] = encoder.transform(train_data[column])
    
    # Проверяем наличие новых значений в тестовом наборе данных
    unique_values_test = test_data[column].unique()
    new_values = set(unique_values_test) - set(unique_values_train)
    if new_values:
        print(f"Новые значения в столбце '{column}' в тестовом наборе данных: {new_values}")
    
    test_data.loc[test_data[column].isin(new_values), column] = unique_values_train[0]  # Заменяем новые значения на первое значение из обучающего набора
    test_data[column] = encoder.transform(test_data[column])

Новые значения в столбце 'state' в тестовом наборе данных: {'CLO', 'ACC'}


In [5]:
X_train = train_data.drop(columns=['id', 'label', 'attack_cat'])
X_test = test_data.drop(columns=['id', 'label', 'attack_cat'])

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

y_train = train_data['label']
y_test = test_data['label']

In [6]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dense(32, activation='relu'),
    Dense(10, activation='softmax')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [7]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
[1m4384/4384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 974us/step - accuracy: 0.8972 - loss: 0.2786 - val_accuracy: 0.9965 - val_loss: 0.0316
Epoch 2/10
[1m4384/4384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.9262 - loss: 0.1534 - val_accuracy: 0.9956 - val_loss: 0.0239
Epoch 3/10
[1m4384/4384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 913us/step - accuracy: 0.9281 - loss: 0.1468 - val_accuracy: 0.9940 - val_loss: 0.0219
Epoch 4/10
[1m4384/4384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 893us/step - accuracy: 0.9305 - loss: 0.1409 - val_accuracy: 0.9916 - val_loss: 0.0277
Epoch 5/10
[1m4384/4384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 951us/step - accuracy: 0.9304 - loss: 0.1403 - val_accuracy: 0.9836 - val_loss: 0.0378
Epoch 6/10
[1m4384/4384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - accuracy: 0.9326 - loss: 0.1359 - val_accuracy: 0.9893 - val_loss: 0.0313
Epoch 7/

<keras.src.callbacks.history.History at 0x1a7f47b1210>

In [8]:
y_pred_probabilities = model.predict(X_test_scaled)
y_pred = np.argmax(y_pred_probabilities, axis=1)

[1m2573/2573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 538us/step


In [9]:
accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
precision = precision_score(y_test, y_pred, average='weighted')

In [10]:
print("Accuracy:", accuracy)
print("Recall:", recall)
print("Precision:", precision)
print("F1 Score:", f1)

Accuracy: 0.8590341543992616
Recall: 0.8590341543992616
Precision: 0.8785557353405549
F1 Score: 0.8551010245127628


# CNN

In [11]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
import numpy as np

In [12]:
train_data = pd.read_csv(r"C:\Users\Almas\Downloads\UNSW_NB15_training-set.csv")
test_data = pd.read_csv(r"C:\Users\Almas\Downloads\UNSW_NB15_testing-set.csv")

In [13]:
encoder = LabelEncoder()
scaler = StandardScaler()
columns_to_encode = ['proto', 'service', 'state', 'attack_cat']
for column in columns_to_encode:
    unique_values_train = train_data[column].unique()
    encoder.fit(unique_values_train)
    train_data[column] = encoder.transform(train_data[column])

    unique_values_test = test_data[column].unique()
    new_values = set(unique_values_test) - set(unique_values_train)
    if new_values:
        print(f"Новые значения в столбце '{column}' в тестовом наборе данных: {new_values}")
    test_data.loc[test_data[column].isin(new_values), column] = unique_values_train[0]
    test_data[column] = encoder.transform(test_data[column])


Новые значения в столбце 'state' в тестовом наборе данных: {'CLO', 'ACC'}


In [14]:
X_train = train_data.drop(columns=['id', 'label', 'attack_cat'])
X_test = test_data.drop(columns=['id', 'label', 'attack_cat'])
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

y_train = train_data['label']
y_test = test_data['label']

In [16]:
X_train_cnn = X_train_scaled.reshape((X_train_scaled.shape[0], X_train_scaled.shape[1], 1))
X_test_cnn = X_test_scaled.reshape((X_test_scaled.shape[0], X_test_scaled.shape[1], 1))

In [17]:
model = Sequential([
    Conv1D(64, 3, activation='relu', input_shape=(X_train_cnn.shape[1], X_train_cnn.shape[2])),
    MaxPooling1D(2),
    Conv1D(32, 3, activation='relu'),
    MaxPooling1D(2),
    Flatten(),
    Dense(10, activation='softmax')
])


  super().__init__(


In [18]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train_cnn, y_train, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
[1m4384/4384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2ms/step - accuracy: 0.9021 - loss: 0.2562 - val_accuracy: 0.9966 - val_loss: 0.0219
Epoch 2/10
[1m4384/4384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2ms/step - accuracy: 0.9263 - loss: 0.1579 - val_accuracy: 0.9904 - val_loss: 0.0277
Epoch 3/10
[1m4384/4384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - accuracy: 0.9288 - loss: 0.1471 - val_accuracy: 0.9961 - val_loss: 0.0223
Epoch 4/10
[1m4384/4384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2ms/step - accuracy: 0.9307 - loss: 0.1432 - val_accuracy: 0.9873 - val_loss: 0.0313
Epoch 5/10
[1m4384/4384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - accuracy: 0.9318 - loss: 0.1421 - val_accuracy: 0.9881 - val_loss: 0.0331
Epoch 6/10
[1m4384/4384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - accuracy: 0.9319 - loss: 0.1412 - val_accuracy: 0.9851 - val_loss: 0.0358
Epoch 7/10

<keras.src.callbacks.history.History at 0x1a7f4889f10>

In [19]:
y_pred_probabilities = model.predict(X_test_cnn)
y_pred = np.argmax(y_pred_probabilities, axis=1)
accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
precision = precision_score(y_test, y_pred, average='weighted')
print("Accuracy:", accuracy)
print("Recall:", recall)
print("Precision:", precision)
print("F1 Score:", f1)

[1m2573/2573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step
Accuracy: 0.8555725598795122
Recall: 0.8555725598795122
Precision: 0.8803801690954876
F1 Score: 0.8507837219015117
