In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, LSTM, TimeDistributed
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy

In [None]:
file_paths = ['Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv',
              'Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv', 
              'Friday-WorkingHours-Morning.pcap_ISCX.csv', 
              'Monday-WorkingHours.pcap_ISCX.csv', 
              'Thursday-WorkingHours-Afternoon-Infilteration.pcap_ISCX.csv', 
              'Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv', 
              'Tuesday-WorkingHours.pcap_ISCX.csv',
              'Wednesday-workingHours.pcap_ISCX.csv'
              ]  # Замените на ваши пути к файлам

In [None]:
dfs = []
first_file = True
for file_path in file_paths:
    df = pd.read_csv(file_path, encoding='latin1')
    if first_file:
        dfs.append(df)
        first_file = False
    else:
        if df.columns.tolist() == dfs[0].columns.tolist():
            dfs.append(df)
        else:
            raise ValueError(f"Заголовки в файле {file_path} не совпадают с заголовками первого файла")

data = pd.concat(dfs, ignore_index=True)

In [None]:
selected_features = [
    ' Flow Duration', 'Total Length of Fwd Packets', 
    ' Fwd Packet Length Mean', 'Flow Bytes/s', ' Flow Packets/s', 
    ' Fwd IAT Mean', ' Bwd IAT Mean', ' Average Packet Size', ' SYN Flag Count', ' Total Backward Packets',
    ' ACK Flag Count', ' Packet Length Variance', ' Destination Port', ' Protocol'
]

In [None]:
X = data[selected_features]

y = data[' Label']

In [None]:
X.replace([np.inf, -np.inf], np.nan, inplace=True)

X.dropna(inplace=True)
y = y[X.index]  

In [None]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_scaled = np.expand_dims(X_scaled, axis=2)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

In [None]:
unique_labels = np.unique(y_train)
print("Уникальные метки классов:", unique_labels)
print("Количество:", len(unique_labels))



In [None]:
model = Sequential()

In [None]:
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=2))
model.add(TimeDistributed(Flatten()))

model.add(LSTM(50, activation='relu'))

model.add(Dense(len(unique_labels), activation='softmax')) 
model.compile(loss='sparse_categorical_crossentropy',
              optimizer=Adam(),
              metrics=['accuracy'])

In [None]:
model.save('modelCNN-more.h5')

In [None]:
model.fit(X_train, y_train, epochs=25, batch_size=32, validation_split=0.2)

In [None]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Accuracy: {accuracy}')

In [None]:
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

print(classification_report(y_test, y_pred_classes, target_names=label_encoder.classes_))

In [None]:
def predict_attack_type(file_path, row_number):
    data = pd.read_csv(file_path)
    
    selected_features = [
    ' Flow Duration', 'Total Length of Fwd Packets', 
    ' Fwd Packet Length Mean', 'Flow Bytes/s', ' Flow Packets/s', 
    ' Fwd IAT Mean', ' Bwd IAT Mean', ' Average Packet Size', ' SYN Flag Count', ' Total Backward Packets',
    ' ACK Flag Count', ' Packet Length Variance', ' Destination Port', ' Protocol'
    ]
    
    sample_df = data.iloc[[row_number]]
    
    X_sample = sample_df[selected_features]
    
    X_sample_scaled = scaler.transform(X_sample)
    
    X_sample_scaled = np.expand_dims(X_sample_scaled, axis=2)
    
    prediction = model.predict(X_sample_scaled)
    predicted_class = np.argmax(prediction, axis=1)
    
    predicted_label = label_encoder.inverse_transform(predicted_class)
    
    return predicted_label[0]

In [None]:
# BENIGN
file_path = 'Wednesday-workingHours.pcap_ISCX.csv'
row_number = 6
predicted_label = predict_attack_type(file_path, row_number)
print(f'Predicted Attack Type: {predicted_label}')

In [None]:
# DDoS
file_path = 'Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv'
row_number = 127328
predicted_label = predict_attack_type(file_path, row_number)
print(f'Predicted Attack Type: {predicted_label}')

In [None]:
# DoS GoldenEye
file_path = 'Wednesday-workingHours.pcap_ISCX.csv'
row_number = 677132
predicted_label = predict_attack_type(file_path, row_number)
print(f'Predicted Attack Type: {predicted_label}')

In [None]:
# DoS Hulk
file_path = 'Wednesday-workingHours.pcap_ISCX.csv'
row_number = 297887
predicted_label = predict_attack_type(file_path, row_number)
print(f'Predicted Attack Type: {predicted_label}')

In [None]:
# DoS Slowhttptest
file_path = 'Wednesday-workingHours.pcap_ISCX.csv'
row_number = 69282
predicted_label = predict_attack_type(file_path, row_number)
print(f'Predicted Attack Type: {predicted_label}')

In [None]:
# DoS slowloris
file_path = 'Wednesday-workingHours.pcap_ISCX.csv'
row_number = 6560
predicted_label = predict_attack_type(file_path, row_number)
print(f'Predicted Attack Type: {predicted_label}')

In [None]:
# Heartbleed
file_path = 'Wednesday-workingHours.pcap_ISCX.csv'
row_number = 597827
predicted_label = predict_attack_type(file_path, row_number)
print(f'Predicted Attack Type: {predicted_label}')

In [None]:
# PortScan
file_path = 'Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv'
row_number = 154555
predicted_label = predict_attack_type(file_path, row_number)
print(f'Predicted Attack Type: {predicted_label}')

In [None]:
def predict_attack_types(file_path, start_row, end_row):
    data = pd.read_csv(file_path)
    
    selected_features = [
    ' Flow Duration', 'Total Length of Fwd Packets', 
    ' Fwd Packet Length Mean', 'Flow Bytes/s', ' Flow Packets/s', 
    ' Fwd IAT Mean', ' Bwd IAT Mean', ' Average Packet Size', ' SYN Flag Count', ' Total Backward Packets',
    ' ACK Flag Count', ' Packet Length Variance', ' Destination Port', ' Protocol'
    ]
    
    sample_df = data.iloc[start_row:end_row]
    
    X_sample = sample_df[selected_features]
    
    X_sample_scaled = scaler.transform(X_sample)
    
    predictions = model.predict(X_sample_scaled)
    predicted_classes = np.argmax(predictions, axis=1)
    
    predicted_labels = label_encoder.inverse_transform(predicted_classes)
    
    return predicted_labels

In [None]:
import timeit

In [None]:
num_iterations = 100
total_time = 0

file_path = 'Wednesday-workingHours.pcap_ISCX.csv'
row_number = 6

single_sample = np.array([X_test[0]])

for _ in range(num_iterations):
    total_time += timeit.timeit(lambda: predict_attack_type(file_path, row_number), number=1)

average_inference_time = total_time / num_iterations
print(f"Average inference time: {average_inference_time} seconds")