In [None]:
import datetime
from scapy.all import rdpcap
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import joblib
from sklearn.preprocessing import MinMaxScaler

import netifaces as ni

In [None]:
pcap_file = "DATA/real-world-data/capture-20231215-153927.pcap"
interface="eth0"
ip = ni.ifaddresses(interface)[ni.AF_INET][0]['addr']

protocol_names = {6: 'TCP', 17: 'UDP'}

In [None]:
def extract_field(pcap_file):
    packets = rdpcap(pcap_file)
    packet = packets[0]
    print(f"Packet  Layers and Attributes:") 
    timestamp = datetime.date.fromtimestamp(packet.time).strftime('%Y-%m-%d %H:%M:%S.%f')
    print(f"Packet - Timestamp: {timestamp}")
    # Iterate through the layers
    layer = packet
    while layer:
        layer_name = layer.name
        print(f"  Layer: {layer_name}")
        # Print layer fields
        for field in layer.fields_desc:
            field_name = field.name
            field_value = layer.getfieldval(field_name)
            print(f"    {field_name}: {field_value}")
        # Go to the next layer
        layer = layer.payload
    print("")
extract_field(pcap_file)

In [None]:
headers = ['dst_port', 'protocol', 'timestamp', 'flow_duration', 'tot_fwd_pkts', 'tot_bwd_pkts', 'totlen_fwd_pkts', 'totlen_bwd_pkts', 'fwd_pkt_len_max', 'fwd_pkt_len_min', 'fwd_pkt_len_mean', 'fwd_pkt_len_std', 'bwd_pkt_len_max', 'bwd_pkt_len_min', 'bwd_pkt_len_mean', 'bwd_pkt_len_std', 'flow_byts_s', 'flow_pkts_s', 'flow_iat_mean', 'flow_iat_std', 'flow_iat_max', 'flow_iat_min', 'fwd_iat_tot', 'fwd_iat_mean', 'fwd_iat_std', 'fwd_iat_max', 'fwd_iat_min', 'bwd_iat_tot', 'bwd_iat_mean', 'bwd_iat_std', 'bwd_iat_max', 'bwd_iat_min', 'fwd_psh_flags', 'bwd_psh_flags', 'fwd_urg_flags', 'bwd_urg_flags', 'fwd_header_len', 'bwd_header_len', 'fwd_pkts_s', 'bwd_pkts_s', 'pkt_len_min', 'pkt_len_max', 'pkt_len_mean', 'pkt_len_std', 'pkt_len_var', 'fin_flag_cnt', 'syn_flag_cnt', 'rst_flag_cnt', 'psh_flag_cnt', 'ack_flag_cnt', 'urg_flag_cnt', 'cwe_flag_count', 'ece_flag_cnt', 'down_up_ratio', 'pkt_size_avg', 'fwd_seg_size_avg', 'bwd_seg_size_avg', 'fwd_byts_b_avg', 'fwd_pkts_b_avg', 'fwd_blk_rate_avg', 'bwd_byts_b_avg', 'bwd_pkts_b_avg', 'bwd_blk_rate_avg', 'subflow_fwd_pkts', 'subflow_fwd_byts', 'subflow_bwd_pkts', 'subflow_bwd_byts', 'init_fwd_win_byts', 'init_bwd_win_byts', 'fwd_act_data_pkts', 'fwd_seg_size_min', 'active_mean', 'active_std', 'active_max', 'active_min', 'idle_mean', 'idle_std', 'idle_max', 'idle_min']

In [None]:
train = pd.read_csv("app/data_csv/train-100k.csv")

In [None]:
train.drop(['Label'], axis=1,inplace=True)
train.replace([np.inf, -np.inf], np.nan, inplace=True)
train.dropna(inplace=True)

In [None]:
train.columns = headers

In [None]:
min_max_scaler = MinMaxScaler().fit(train[['flow_duration', 'tot_fwd_pkts', 'tot_bwd_pkts', 'totlen_fwd_pkts', 'totlen_bwd_pkts', 'fwd_pkt_len_max', 'fwd_pkt_len_min', 'fwd_pkt_len_mean', 'fwd_pkt_len_std', 'bwd_pkt_len_max', 'bwd_pkt_len_min', 'bwd_pkt_len_mean', 'bwd_pkt_len_std', 'flow_byts_s', 'flow_pkts_s', 'flow_iat_mean', 'flow_iat_std', 'flow_iat_max', 'flow_iat_min', 'fwd_iat_tot', 'fwd_iat_mean', 'fwd_iat_std', 'fwd_iat_max', 'fwd_iat_min', 'bwd_iat_tot', 'bwd_iat_mean', 'bwd_iat_std', 'bwd_iat_max', 'bwd_iat_min', 'fwd_psh_flags', 'bwd_psh_flags', 'fwd_urg_flags', 'bwd_urg_flags', 'fwd_header_len', 'bwd_header_len', 'fwd_pkts_s', 'bwd_pkts_s', 'pkt_len_min', 'pkt_len_max', 'pkt_len_mean', 'pkt_len_std', 'pkt_len_var', 'fin_flag_cnt', 'syn_flag_cnt', 'rst_flag_cnt', 'psh_flag_cnt', 'ack_flag_cnt', 'urg_flag_cnt', 'cwe_flag_count', 'ece_flag_cnt', 'down_up_ratio', 'pkt_size_avg', 'fwd_seg_size_avg', 'bwd_seg_size_avg', 'fwd_byts_b_avg', 'fwd_pkts_b_avg', 'fwd_blk_rate_avg', 'bwd_byts_b_avg', 'bwd_pkts_b_avg', 'bwd_blk_rate_avg', 'subflow_fwd_pkts', 'subflow_fwd_byts', 'subflow_bwd_pkts', 'subflow_bwd_byts', 'init_fwd_win_byts', 'init_bwd_win_byts', 'fwd_act_data_pkts', 'fwd_seg_size_min', 'active_mean', 'active_std', 'active_max', 'active_min', 'idle_mean', 'idle_std', 'idle_max', 'idle_min']])
numerical_columns = ['flow_duration', 'tot_fwd_pkts', 'tot_bwd_pkts', 'totlen_fwd_pkts', 'totlen_bwd_pkts', 'fwd_pkt_len_max', 'fwd_pkt_len_min', 'fwd_pkt_len_mean', 'fwd_pkt_len_std', 'bwd_pkt_len_max', 'bwd_pkt_len_min', 'bwd_pkt_len_mean', 'bwd_pkt_len_std', 'flow_byts_s', 'flow_pkts_s', 'flow_iat_mean', 'flow_iat_std', 'flow_iat_max', 'flow_iat_min', 'fwd_iat_tot', 'fwd_iat_mean', 'fwd_iat_std', 'fwd_iat_max', 'fwd_iat_min', 'bwd_iat_tot', 'bwd_iat_mean', 'bwd_iat_std', 'bwd_iat_max', 'bwd_iat_min', 'fwd_psh_flags', 'bwd_psh_flags', 'fwd_urg_flags', 'bwd_urg_flags', 'fwd_header_len', 'bwd_header_len', 'fwd_pkts_s', 'bwd_pkts_s', 'pkt_len_min', 'pkt_len_max', 'pkt_len_mean', 'pkt_len_std', 'pkt_len_var', 'fin_flag_cnt', 'syn_flag_cnt', 'rst_flag_cnt', 'psh_flag_cnt', 'ack_flag_cnt', 'urg_flag_cnt', 'cwe_flag_count', 'ece_flag_cnt', 'down_up_ratio', 'pkt_size_avg', 'fwd_seg_size_avg', 'bwd_seg_size_avg', 'fwd_byts_b_avg', 'fwd_pkts_b_avg', 'fwd_blk_rate_avg', 'bwd_byts_b_avg', 'bwd_pkts_b_avg', 'bwd_blk_rate_avg', 'subflow_fwd_pkts', 'subflow_fwd_byts', 'subflow_bwd_pkts', 'subflow_bwd_byts', 'init_fwd_win_byts', 'init_bwd_win_byts', 'fwd_act_data_pkts', 'fwd_seg_size_min', 'active_mean', 'active_std', 'active_max', 'active_min', 'idle_mean', 'idle_std', 'idle_max', 'idle_min']

In [None]:
train = train.drop(["timestamp"], axis=1)

In [None]:
train.head()

In [None]:
df = pd.read_csv("app/data_csv/ftp-crack.csv")
df = df.drop(columns=['src_ip', 'dst_ip','src_port','timestamp'])
df.head()

In [None]:
df[numerical_columns] = min_max_scaler.transform(df[numerical_columns])

In [None]:
test = df.values

In [None]:
randomforest = joblib.load("checkpoints/RandomForest.joblib")

In [None]:
rf = randomforest.predict(test)

In [None]:
for i in rf:
    print(i)

In [None]:
from tensorflow.keras.models import load_model
NN = load_model("checkpoints/neuralNetModel.h5")

In [None]:
import glob

count = 0
for file in glob.glob("app/data_csv/cse*"):
    print(file)
    df = pd.read_csv(file)
    df = df.drop(["timestamp"], axis=1)
    # df = df.drop(columns=['src_ip', 'dst_ip','src_port','timestamp'])
    # test2 = df.values

    # X = np.array(test2, dtype=float)
    result = randomforest.predict(df)
    for i in result:
        if i != -1:
            count += 1 
            print(file)
    break



In [None]:
for i in NN.predict(test):
    print(i)

In [None]:
count

In [None]:
for i in result:
    print(i)

In [None]:
count

In [None]:
test.shape

In [None]:
reshaped_array = test[0].reshape(1, -1)

In [None]:
result = clf2.predict(reshaped_array)

In [None]:
result[0]

In [None]:
for i in result:
    if i == 1:
        print("OK")
    # elif i == -1:
    #     print("-1")

In [None]:
test[0]

In [None]:
import joblib
model = joblib.load("model.pkl")

In [None]:
#  1 :Benign 
# -1 :Malicious 
def predict(data):
    reshaped_array = data.reshape(1, -1)
    result = model.predict(reshaped_array)

    if result[0] == -1:
        print("Malicious")
    else:
         print("Benign ")
    