In [None]:
pip install scapy

Collecting scapy
  Downloading scapy-2.5.0.tar.gz (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scapy
  Building wheel for scapy (setup.py) ... [?25l[?25hdone
  Created wheel for scapy: filename=scapy-2.5.0-py2.py3-none-any.whl size=1444330 sha256=2785a5a5fed195a09fd26da5d5e32107293f89cba229c9f0cd5b51c1796073a6
  Stored in directory: /root/.cache/pip/wheels/82/b7/03/8344d8cf6695624746311bc0d389e9d05535ca83c35f90241d
Successfully built scapy
Installing collected packages: scapy
Successfully installed scapy-2.5.0


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from scapy.all import *
import csv
import os
import statistics  # Import the statistics module for calculating standard deviation


'''Nithish's features related functions'''
# Function to calculate time intervals between consecutive frames
def calculate_time_intervals(pcap_file):
    time_intervals = []
    previous_timestamp = None

    with PcapReader(pcap_file) as packets:
        for packet in packets:
            if TCP in packet:
                current_timestamp = packet.time
                if previous_timestamp is not None:
                    time_interval = current_timestamp - previous_timestamp
                    time_intervals.append(time_interval)
                previous_timestamp = current_timestamp

    return time_intervals

# Function to calculate the average time interval for a TCP flow
def calculate_average_time_interval(time_intervals):
    if len(time_intervals) > 0:
        return sum(time_intervals) / len(time_intervals)
    else:
        return 0.0


# Function to calculate the time difference between the first and last packet in a TCP flow
def calculate_time_difference_first_last(pcap_file):
    first_timestamp = None
    last_timestamp = None

    with PcapReader(pcap_file) as packets:
        for packet in packets:
            if TCP in packet:
                current_timestamp = packet.time
                if first_timestamp is None:
                    first_timestamp = current_timestamp
                last_timestamp = current_timestamp

    if first_timestamp is not None and last_timestamp is not None:
        return last_timestamp - first_timestamp
    else:
        return 0.0



# Function to calculate the rate of window size growth for a TCP flow
def calculate_window_size_growth(pcap_file):
    flow_window_sizes = {}
    with PcapReader(pcap_file) as packets:
        for packet in packets:
            if TCP in packet:
                flow_key = (packet[IP].src, packet[TCP].sport, packet[IP].dst, packet[TCP].dport)
                window_size = packet[TCP].window

                if flow_key not in flow_window_sizes:
                    flow_window_sizes[flow_key] = []

                flow_window_sizes[flow_key].append(window_size)

    rate_of_growth = {}
    for flow, window_sizes in flow_window_sizes.items():
        if len(window_sizes) > 1:
            initial_window = window_sizes[0]
            final_window = window_sizes[-1]
            growth_rate = (initial_window - final_window) / initial_window  # Rate of window size growth formula
            rate_of_growth[flow] = growth_rate
        else:
            rate_of_growth[flow] = 0.0

    return rate_of_growth

# Function to calculate the average rate of window size growth for TCP flow
def calculate_average_rate_of_growth(rate_of_growth):
    if rate_of_growth:
        total_growth = sum(rate_of_growth.values())
        return total_growth / len(rate_of_growth)
    else:
        return 0.0



# Function to calculate the rate of sequence number increase for a TCP flow
def calculate_sequence_number_increase(pcap_file):
    flow_sequence_numbers = {}
    with PcapReader(pcap_file) as packets:
        for packet in packets:
            if TCP in packet:
                flow_key = (packet[IP].src, packet[TCP].sport, packet[IP].dst, packet[TCP].dport)
                sequence_number = packet[TCP].seq

                if flow_key not in flow_sequence_numbers:
                    flow_sequence_numbers[flow_key] = []

                flow_sequence_numbers[flow_key].append(sequence_number)

    rate_of_increase = {}
    for flow, sequence_numbers in flow_sequence_numbers.items():
        if len(sequence_numbers) > 1:
            initial_sequence = sequence_numbers[0]
            final_sequence = sequence_numbers[-1]
            increase_rate = (final_sequence - initial_sequence) / len(sequence_numbers)  # Rate of sequence number increase formula
            rate_of_increase[flow] = increase_rate
        else:
            rate_of_increase[flow] = 0.0

    return rate_of_increase

# Function to calculate the average rate of sequence number increase for TCP flow
def calculate_average_rate_of_sequence_increase(rate_of_increase):
    if rate_of_increase:
        total_increase = sum(rate_of_increase.values())
        return total_increase / len(rate_of_increase)
    else:
        return 0.0






'''Pratham's features related functions'''
# Function to count ACK packets in a pcap file
def count_ack_packets_in_pcap(pcap_file_path):
    total_ack_packets = 0

    packets = rdpcap(pcap_file_path)

    for packet in packets:
        if TCP in packet and packet[TCP].flags & 0x10:  # Check if ACK flag is set (0x10)
            total_ack_packets += 1

    return total_ack_packets

# Function to calculate the average packet size
def calculate_average_packet_size(pcap_file_path):
    total_packet_size = 0
    packet_count = 0

    packets = rdpcap(pcap_file_path)

    for packet in packets:
        if TCP in packet:
            packet_size = len(packet)
            total_packet_size += packet_size
            packet_count += 1

    # Calculate average packet size for the current file
    if packet_count > 0:
        average_packet_size = total_packet_size / packet_count
        return average_packet_size
    else:
        return 0.0  # Return 0.0 if there are no relevant packets in the pcap file

# Function to count RST packets in a pcap file
def count_rst_packets_in_pcap(pcap_file_path):
    total_rst_packets = 0

    packets = rdpcap(pcap_file_path)

    for packet in packets:
        if TCP in packet and packet[TCP].flags & 0x04:  # Check if RST flag is set (0x04)
            total_rst_packets += 1

    return total_rst_packets

# Function to calculate standard deviation of TTL in a pcap file
def calculate_ttl_standard_deviation(pcap_file_path):
    ttl_values = []

    packets = rdpcap(pcap_file_path)

    for packet in packets:
        if IP in packet:
            ttl = packet[IP].ttl
            ttl_values.append(ttl)

    if ttl_values:
        return statistics.stdev(ttl_values)
    else:
        return 0.0






'''Neeraj's features related functions'''
# FEATURE 1: Time Since First Frame
def calculate_time_since_first_frame(pcap_file):
    time_since_first_frame = None

    with PcapReader(pcap_file) as packets:
        for packet in packets:
            if TCP in packet:
                current_timestamp = packet.time
                if time_since_first_frame is None:
                    time_since_first_frame = current_timestamp
                else:
                    time_since_first_frame = current_timestamp - time_since_first_frame

    return time_since_first_frame

# FEATURE 2: Total Number of Packets with FIN[Finish] Flag
def count_fin_packets(pcap_file):
    total_fin_packets = 0

    with PcapReader(pcap_file) as packets:
        for packet in packets:
            if TCP in packet and packet[TCP].flags & 0x01:  # Check if FIN flag is set (0x01)
                total_fin_packets += 1

    return total_fin_packets


# FEATURE 3: Total Number of Packets with URG[Urgent] Flag Set
def count_urg_packets(pcap_file):
    total_urg_packets = 0

    with PcapReader(pcap_file) as packets:
        for packet in packets:
            if TCP in packet and packet[TCP].flags & 0x20:  # Check if URG flag is set (0x20)
                total_urg_packets += 1

    return total_urg_packets

# FEATURE 4: Total Number of Packets with PSH[Push] Flag Set
def count_psh_packets(pcap_file):
    total_psh_packets = 0

    with PcapReader(pcap_file) as packets:
        for packet in packets:
            if TCP in packet and packet[TCP].flags & 0x08:  # Check if PSH flag is set (0x08)
                total_psh_packets += 1

    return total_psh_packets






'''Manas's features related functions'''
def calculate_avg_payload_length(pcap_filename):
    packets = rdpcap(pcap_filename)  # Read PCAP file
    total_payload_length = 0
    total_packets = 0

    for packet in packets:
        if packet.haslayer(IP) and packet.haslayer(TCP) and packet.haslayer(Raw):
            total_payload_length += len(packet[Raw].load)
            total_packets += 1

    if total_packets > 0:
        avg_payload_length = total_payload_length / total_packets
    else:
        avg_payload_length = 0

    return avg_payload_length

def calculate_std_iat(pcap_filename):
    packets=rdpcap(pcap_filename)
    times = []
    for packet in packets:
        timestamp = packet.time
        times.append(timestamp)

    def std_time_between_packets(packet_list):
        if len(packet_list)<=1:
          return(0)

        time_diffs = []
        for i in range(1, len(packet_list)):
            time_diff = packet_list[i] - packet_list[i-1]
            time_diffs.append(time_diff)
        return statistics.stdev(time_diffs)

    std_server_to_client = std_time_between_packets(times)
    return(std_server_to_client)

def calculate_client_iat(pcap_filename):
    packets=rdpcap(pcap_filename)
    client_ip=packets[0][IP].src
    # server_ip=packets[0][IP].dst
    times = []
    for packet in packets:
        if(packet[IP].dst==client_ip):
            timestamp = packet.time
            times.append(timestamp)

    def avg_client_iat(time_list):
        time_diffs = []
        if len(time_list)==1:
            return(time_list[0])
        elif len(time_list)==0:
            return(0)
        else:
            for i in range(1, len(time_list)):
                time_diff = time_list[i] - time_list[i-1]
                time_diffs.append(time_diff)
                # print(time_diff)
            return statistics.mean(time_diffs)

    avg_iat_client = avg_client_iat(times)
    return(avg_iat_client)

def calculate_server_iat(pcap_filename):
    packets=rdpcap(pcap_filename)
    # client_ip=packets[0][IP].src
    server_ip=packets[0][IP].dst
    times = []
    for packet in packets:
        if(packet[IP].dst==server_ip):
            timestamp = packet.time
            times.append(timestamp)

    def avg_server_iat(time_list):
        time_diffs = []
        if len(time_list)==1:
            return(time_list[0])
        elif len(time_list)==0:
            return(0)
        else:
            for i in range(1, len(time_list)):
                time_diff = time_list[i] - time_list[i-1]
                time_diffs.append(time_diff)
                # print(time_diff)
            return statistics.mean(time_diffs)

    avg_iat_server = avg_server_iat(times)
    return(avg_iat_server)


def calculate_down_up_ratio(pcap_file):
    down_bytes = 0
    up_bytes = 0

    packets = rdpcap(pcap_file)
    client_ip=packets[0][IP].src
    server_ip=packets[0][IP].dst
    for packet in packets:
            payload_size = len(packet)
            if packet[IP].dst == client_ip:
                down_bytes += payload_size
            else:
                up_bytes += payload_size


    ratio = down_bytes / up_bytes

    return(ratio)





''' Reading the PCAP files algorithm '''
# Function to append features to a CSV file
def append_features_to_csv(features, flow_id, label, csv_file):
    with open(csv_file, 'a', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)
        csv_writer.writerow([flow_id] + features + [label])

def main(input_folder, csv_file):
    with open(csv_file, 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)
        csv_writer.writerow(["Flow ID","Average Time Interval", "Time Difference First-Last Packet","Average Rate of Window Size Growth", "Average Rate of Sequence Number Increase","ACK Packets", "Average Packet Size", "RST Packets", "TTL Standard Deviation","Time Since First Frame (seconds)", "Total FIN Packets", "Total URG Packets", "Total PSH Packets","avg_payload_len","std_dev_iat","avg_iat_client","avg_iat_server","down/up_ratio","Label"])

    for root, _, files in os.walk(input_folder):
        for filename in files:
            if filename.endswith(".pcap"):
                pcap_file = os.path.join(root, filename)

                #Nithish's features

                #feature1
                time_intervals = calculate_time_intervals(pcap_file)
                average_time_interval = calculate_average_time_interval(time_intervals)
                #feature2
                time_difference_first_last = calculate_time_difference_first_last(pcap_file)
                #feature3
                rate_of_growth = calculate_window_size_growth(pcap_file)
                average_rate_of_growth = calculate_average_rate_of_growth(rate_of_growth)
                #feature4
                rate_of_increase = calculate_sequence_number_increase(pcap_file)
                average_rate_of_increase = calculate_average_rate_of_sequence_increase(rate_of_increase)


                #Pratham's features

                #feature1
                ack_packets = count_ack_packets_in_pcap(pcap_file)
                #feature2
                average_packet_size = calculate_average_packet_size(pcap_file)
                #feature3
                rst_packets = count_rst_packets_in_pcap(pcap_file)
                #feature4
                ttl_std_deviation = calculate_ttl_standard_deviation(pcap_file)  # Calculate TTL standard deviation


                #Neeraj's features

                #feature1
                time_since_first_frame = calculate_time_since_first_frame(pcap_file)
                #feature2
                total_fin_packets = count_fin_packets(pcap_file)
                #feature3
                total_urg_packets = count_urg_packets(pcap_file)
                #feature4
                total_psh_packets = count_psh_packets(pcap_file)


                #Manas's features

                #feature1
                avg_payload_len = calculate_avg_payload_length(pcap_file)
                #feature2
                std_dev_iat = calculate_std_iat(pcap_file)
                #feature3
                avg_iat_client = calculate_client_iat(pcap_file)
                #feature4
                avg_iat_server = calculate_server_iat(pcap_file)
                #feature5
                down_by_up_ratio = calculate_down_up_ratio(pcap_file)



                flow_id = filename[:-5]  # Extract flow identifier from the filename
                label = 0 if "Malicious" in root else 1  # Assign label based on the folder name

                features = [average_time_interval,time_difference_first_last,average_rate_of_growth,average_rate_of_increase,ack_packets,average_packet_size,rst_packets,ttl_std_deviation,time_since_first_frame, total_fin_packets, total_urg_packets, total_psh_packets,avg_payload_len, std_dev_iat, avg_iat_client, avg_iat_server, down_by_up_ratio]
                append_features_to_csv(features, flow_id, label, csv_file)

if __name__ == '__main__':
    input_folder = '/content/drive/MyDrive/2023_PES_CP_B44/Datasets'
    csv_file = '/content/features.csv'
    main(input_folder, csv_file)


StatisticsError: ignored