In [18]:
from scapy.all import *
from collections import Counter
import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model
import pickle
import tensorflow as tf
import joblib
from numpy import sqrt
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [62]:
pcap_file = './data/pcap/attack.pcap'

packets = rdpcap(pcap_file)

In [63]:
packets[0].summary()

'Ether / IP / ICMP 192.168.124.2 > 192.168.124.131 dest-unreach host-unreachable / IPerror / UDPerror / NBNSQueryRequest / Raw'

In [57]:
flows = {}

src = '192.168.124.2'
dst = '192.168.124.131'

start_time, end_time = None, None

for packet in packets:
    if packet.haslayer(IP) and packet.haslayer(TCP):
        # Create flow dựa trên dst IP/ports
        flow_id = (packet[IP].src, packet[IP].dst, packet[TCP].sport, packet[TCP].dport)

        timestamp = packet.time
        if not start_time:
            start_time = timestamp
        end_time = timestamp

        if flow_id not in flows:
            flows[flow_id] = {
                "packets": [],
                "start_time": packet.time,
                "end_time": packet.time,
                "last_packet_time": None,
                "inbound_count": 0,
                "outbound_count": 0,
                "incoming_lengths": [],  # Store lengths of incoming packets
                "outgoing_lengths": [],  # Store lengths of outgoing packets
            }
        
        if packet[IP].src == flow_id[0] and packet[TCP].sport == flow_id[2]:
            flows[flow_id]["outbound_count"] += 1

        if packet[IP].src == flow_id[1] and packet[TCP].sport == flow_id[3]:
            flows[flow_id]["inbound_count"] += 1

        flows[flow_id]["end_time"] = packet.time
        flows[flow_id]["packets"].append(packet)

        if packet[IP].src == flow_id[0] and packet[TCP].sport == flow_id[2]:  # outgoing
            flows[flow_id]["outgoing_lengths"].append(len(packet))
        else:  # incoming
            flows[flow_id]["incoming_lengths"].append(len(packet))

feature_data = []
for flow_id, flow in flows.items():
    # Phân tích flow
    duration = flow["end_time"] - flow["start_time"]
    total_packets = len(flow["packets"])
    total_bytes = sum(len(pkt) for pkt in flow["packets"])
    #byte_rate = total_bytes / duration if duration > 0 else 0

    # Phân tích size packet
    packet_sizes = [len(pkt) for pkt in flow["packets"]]
    min_size = min(packet_sizes) if packet_sizes else 0
    max_size = max(packet_sizes) if packet_sizes else 0
    avg_size = np.mean(packet_sizes) if packet_sizes else 0
    std_size = np.std(packet_sizes) if packet_sizes else 0
    is_ip = 1 if flow["packets"][0].haslayer(IP) else 0
    is_tcp = 1 if flow["packets"][0].haslayer(TCP) else 0
    
    # Initialize counters of TCP flags
    flag_counter = Counter()
    for pkt in flow["packets"]:
        flag_counter += Counter(str(pkt[TCP].flags))

    outbound_packets = flow["outbound_count"]
    rate = srate = outbound_packets / duration if duration > 0 else 0

    inbound_packets = flow["inbound_count"]
    
    #drate = inbound_packets / duration if duration > 0 else 0
    iat = packet.time - flows[flow_id]["last_packet_time"] if flows[flow_id]["last_packet_time"] else 0

    incoming_variance = np.var(flow["incoming_lengths"]) if flow["incoming_lengths"] else 0
    outgoing_variance = np.var(flow["outgoing_lengths"]) if flow["outgoing_lengths"] else 0
    
    radius = sqrt(incoming_variance + outgoing_variance)
    
    covariance = (incoming_variance - outgoing_variance) if (incoming_variance - outgoing_variance) else 0
    
    variance = incoming_variance / outgoing_variance if outgoing_variance > 0 else 0

    # Initialize counters of TCP flags
    flag_counter = Counter()
    for pkt in flow["packets"]:
        flag_counter += Counter(str(pkt[TCP].flags))
    feature_data.append({
        "flow_duration": end_time - start_time,
        "Header_Length": 54,"Protocol Type": 6,"Duration": 64,"Rate": rate,
        "Srate": srate,"Drate": 0,"fin_flag_number": 0,"syn_flag_number": 0,"rst_flag_number": 0,
        "psh_flag_number": 0,"ack_flag_number": 0,"ece_flag_number": 0,
        "cwr_flag_number": 0,
        "ack_count": flag_counter['A'],
        "syn_count": flag_counter['S'],
        "fin_count": flag_counter['F'],
        "urg_count": flag_counter['U'],
        "rst_count": flag_counter['R'],
        #"psh_flag_count": flag_counter['P'],
     
        "HTTP": 0,"HTTPS": 0,"DNS": 0,"Telnet": 0,"SMTP": 0,"SSH": 0,
        "IRC": 0,"TCP": is_tcp,"UDP": 0,"DHCP": 0,"ARP": 0,
        "ICMP": 0,"IPv": is_ip,"LLC": 1,
        "Tot sum": 567,"Min": min_size,"Max": max_size,"AVG": avg_size,
        "Std": std_size,
        "Tot size": 54,
        "IAT": iat,
        "Number": 9.5,
        "Magnitue": 10.3923,
        "Radius": radius,
        "Covariance": covariance,
        "Variance": variance,
        "Weight": 141.55,    
    })

df = pd.DataFrame(feature_data)
df.to_csv('flow_features.csv', index=False)


In [58]:
df

Unnamed: 0,flow_duration,Header_Length,Protocol Type,Duration,Rate,Srate,Drate,fin_flag_number,syn_flag_number,rst_flag_number,...,AVG,Std,Tot size,IAT,Number,Magnitue,Radius,Covariance,Variance,Weight
0,0.687350,54,6,64,2E+6,2E+6,0,0,0,0,...,60.0,0.0,54,0,9.5,10.3923,0.0,0,0,141.55
1,0.687350,54,6,64,2E+6,2E+6,0,0,0,0,...,60.0,0.0,54,0,9.5,10.3923,0.0,0,0,141.55
2,0.687350,54,6,64,0,0,0,0,0,0,...,60.0,0.0,54,0,9.5,10.3923,0.0,0,0,141.55
3,0.687350,54,6,64,2E+6,2E+6,0,0,0,0,...,60.0,0.0,54,0,9.5,10.3923,0.0,0,0,141.55
4,0.687350,54,6,64,1E+6,1E+6,0,0,0,0,...,60.0,0.0,54,0,9.5,10.3923,0.0,0,0,141.55
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13117,0.687350,54,6,64,1E+6,1E+6,0,0,0,0,...,60.0,0.0,54,0,9.5,10.3923,0.0,0,0,141.55
13118,0.687350,54,6,64,1E+6,1E+6,0,0,0,0,...,60.0,0.0,54,0,9.5,10.3923,0.0,0,0,141.55
13119,0.687350,54,6,64,2E+6,2E+6,0,0,0,0,...,60.0,0.0,54,0,9.5,10.3923,0.0,0,0,141.55
13120,0.687350,54,6,64,27777.77777777777777777777778,27777.77777777777777777777778,0,0,0,0,...,60.0,0.0,54,0,9.5,10.3923,0.0,0,0,141.55


In [59]:
df.head()

Unnamed: 0,flow_duration,Header_Length,Protocol Type,Duration,Rate,Srate,Drate,fin_flag_number,syn_flag_number,rst_flag_number,...,AVG,Std,Tot size,IAT,Number,Magnitue,Radius,Covariance,Variance,Weight
0,0.68735,54,6,64,2000000.0,2000000.0,0,0,0,0,...,60.0,0.0,54,0,9.5,10.3923,0.0,0,0,141.55
1,0.68735,54,6,64,2000000.0,2000000.0,0,0,0,0,...,60.0,0.0,54,0,9.5,10.3923,0.0,0,0,141.55
2,0.68735,54,6,64,0.0,0.0,0,0,0,0,...,60.0,0.0,54,0,9.5,10.3923,0.0,0,0,141.55
3,0.68735,54,6,64,2000000.0,2000000.0,0,0,0,0,...,60.0,0.0,54,0,9.5,10.3923,0.0,0,0,141.55
4,0.68735,54,6,64,1000000.0,1000000.0,0,0,0,0,...,60.0,0.0,54,0,9.5,10.3923,0.0,0,0,141.55


In [60]:
model_path = './modelCNN.pkl'
with open(model_path, 'rb') as file:
    model = pickle.load(file)
numpy_array = df.to_numpy().astype(float)
loaded_scaler = joblib.load('scaler_model.joblib')
scaled = loaded_scaler.fit_transform(numpy_array)

label_encoder = joblib.load('label_encoder.joblib')

In [61]:
x_train = scaled[5]
input_data = np.array([x_train])
# Reshape the input data to fit the model's input shape
input_data = input_data.reshape((1, 46, 1))
prediction = model.predict(input_data)
predicted_classes = prediction.argmax(axis=1)
predicted_label = label_encoder.inverse_transform(predicted_classes)
print(f"Type attack : {predicted_label}")

Phát hiện dạng tấn công : ['DDoS-ICMP_Flood']
