## Loading the model

In [1]:
import joblib

# Load the model
loaded_model = joblib.load('random_forest_model.pkl')

### Capturing live traffic using scapy and Preprocess the Captured Data

Find out available network interfaces

In [2]:
from scapy.all import get_if_list
print(get_if_list())

['{39F984D3-949A-4B2B-9801-7CEB8F0BE110}', '{4E2425F9-9E1A-4EFD-A25B-6A01D0CADB6E}', '{C7544003-7698-4181-9977-2BC477E7FED6}', '{7A9105AD-1ADE-433B-9491-D9E565FF8549}', '{7C0D1531-42C5-4015-B6A0-50A074DBA5F9}', '{D64AF707-AE01-48A5-88FD-49FAE74BF383}', '{3D759493-EBA8-4B20-8697-A81D4323D608}', '{2C650E79-2A69-48B8-9D96-0D8E8B234426}', '{512B0187-7635-4862-BE89-8821326A3EF6}', '\\Device\\NPF_Loopback', '{B236F5B9-CA30-43A6-8171-4C5E328FCDB6}']


In [3]:
import wmi

c = wmi.WMI()
for i in c.Win32_NetworkAdapterConfiguration(IPEnabled=True):
    interface = print(f"Interface Name: {i.Description}")
    print(f"GUID: {i.SettingID}\n")

Interface Name: Intel(R) Wi-Fi 6 AX201 160MHz
GUID: {7C0D1531-42C5-4015-B6A0-50A074DBA5F9}

Interface Name: VMware Virtual Ethernet Adapter for VMnet1
GUID: {3D759493-EBA8-4B20-8697-A81D4323D608}

Interface Name: VMware Virtual Ethernet Adapter for VMnet8
GUID: {D64AF707-AE01-48A5-88FD-49FAE74BF383}



In [4]:
from scapy.all import sniff, IP, TCP, UDP

# Function to process packets and match with the required features
def process_packet(packet):
    feature_vector = {
        'duration': 0,  # Placeholder (requires more complex timing logic)
        'protocol_type': packet[IP].proto if IP in packet else 0,  # Protocol type (TCP, UDP, etc.)
        'flag': packet[IP].flags if IP in packet else 0,  # Flags
        'src_bytes': len(packet[IP].payload) if IP in packet else 0,  # Payload size
        'dst_bytes': 0,  # Placeholder (might need session tracking)
        'land': 1 if IP in packet and packet[IP].src == packet[IP].dst else 0,  # 1 if src IP == dst IP
        'wrong_fragment': packet[IP].frag if IP in packet else 0,  # Fragment offset
        'urgent': packet[TCP].urgptr if TCP in packet else 0,  # Urgent pointer (TCP only)
        'hot': 0,  # Placeholder (context-dependent)
        'num_failed_logins': 0,  # Placeholder
        'logged_in': 0,  # Placeholder
        'num_compromised': 0,  # Placeholder
        'root_shell': 0,  # Placeholder
        'su_attempted': 0,  # Placeholder
        'num_file_creations': 0,  # Placeholder
        'num_shells': 0,  # Placeholder
        'num_access_files': 0,  # Placeholder
        'num_outbound_cmds': 0,  # Placeholder
        'is_host_login': 0,  # Placeholder
        'is_guest_login': 0,  # Placeholder
        'count': 1,  # Placeholder 
        'srv_count': 1,  # Placeholder 
        'serror_rate': 0,  # Placeholder
        'rerror_rate': 0,  # Placeholder 
        'same_srv_rate': 0,  # Placeholder 
        'diff_srv_rate': 0,  # Placeholder 
        'srv_diff_host_rate': 0,  # Placeholder 
        'dst_host_count': 0,  # Placeholder 
        'dst_host_srv_count': 0,  # Placeholder
        'dst_host_diff_srv_rate': 0,  # Placeholder
        'dst_host_same_src_port_rate': 0,  # Placeholder
        'dst_host_srv_diff_host_rate': 0,  # Placeholder
        'target': 0,  # Placeholder
        'Attack Type': 'normal',  # Placeholder
    }
    
    return feature_vector

# Function to capture packets and process them
def capture_live_traffic(packet_count=10, iface=None):
    packets = sniff(count=packet_count, iface=iface)
    features = [process_packet(packet) for packet in packets]
    return features

# Specify the interface
live_data = capture_live_traffic(packet_count=10, iface='Intel(R) Wi-Fi 6 AX201 160MHz')

# Print the captured data in column-wise format
import pandas as pd
df_live_data = pd.DataFrame(live_data)
print(df_live_data)
df_live_data.columns

   duration  protocol_type flag  src_bytes  dst_bytes  land  wrong_fragment  \
0         0              6   DF       1420          0     0               0   
1         0              6   DF       1420          0     0               0   
2         0              6   DF         20          0     0               0   
3         0              6   DF       1420          0     0               0   
4         0              6   DF        352          0     0               0   
5         0              6   DF         20          0     0               0   
6         0              6   DF       1420          0     0               0   
7         0              6   DF       1420          0     0               0   
8         0              6   DF       1420          0     0               0   
9         0              6   DF       1420          0     0               0   

   urgent  hot  num_failed_logins  ...  same_srv_rate  diff_srv_rate  \
0       0    0                  0  ...              0     

Index(['duration', 'protocol_type', 'flag', 'src_bytes', 'dst_bytes', 'land',
       'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in',
       'num_compromised', 'root_shell', 'su_attempted', 'num_file_creations',
       'num_shells', 'num_access_files', 'num_outbound_cmds', 'is_host_login',
       'is_guest_login', 'count', 'srv_count', 'serror_rate', 'rerror_rate',
       'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate',
       'dst_host_count', 'dst_host_srv_count', 'dst_host_diff_srv_rate',
       'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate', 'target',
       'Attack Type'],
      dtype='object')

### Making Predictions based on live data

In [6]:
import joblib
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Load the pre-trained model
loaded_model = joblib.load('random_forest_model.pkl')

# Manually define the expected feature names (replace with actual feature names)
model_columns = ['duration', 'protocol_type', 'flag', 'src_bytes', 'dst_bytes', 'land',
                 'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in',
                 'num_compromised', 'root_shell', 'su_attempted', 'num_file_creations',
                 'num_shells', 'num_access_files', 'num_outbound_cmds', 'is_host_login',
                 'is_guest_login', 'count', 'srv_count', 'serror_rate', 'rerror_rate',
                 'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate',
                 'dst_host_count', 'dst_host_srv_count', 'dst_host_diff_srv_rate',
                 'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate']

# Subset df_live_data to only include the expected features
df_live_data = df_live_data[model_columns]

# Apply label encoding to categorical features if necessary
label_encoders = {}
for column in df_live_data.columns:
    if df_live_data[column].dtype == 'object':
        le = LabelEncoder()
        df_live_data[column] = le.fit_transform(df_live_data[column].astype(str))
        label_encoders[column] = le  # Save encoder for potential inverse transformation

# Ensure the DataFrame is fully numeric
df_live_data = df_live_data.apply(pd.to_numeric)

# Convert DataFrame to numpy array
X_live = df_live_data.values

# Apply the model to the preprocessed live data
predictions = loaded_model.predict(X_live)

# Print the predictions
print(predictions)


['normal' 'normal' 'normal' 'normal' 'normal' 'normal' 'normal' 'normal'
 'normal' 'normal']


# Making predictions when Malicious Data is available

### Manually injecting bad traffic

In [7]:
# Creating a copy of the live data
malicious_data = df_live_data.copy()

# Inject malicious values
malicious_data.iloc[1, malicious_data.columns.get_loc('duration')] = 0
malicious_data.iloc[1, malicious_data.columns.get_loc('protocol_type')] = 1  # tcp
malicious_data.iloc[1, malicious_data.columns.get_loc('flag')] = 0  # S0
malicious_data.iloc[1, malicious_data.columns.get_loc('src_bytes')] = 0
malicious_data.iloc[1, malicious_data.columns.get_loc('dst_bytes')] = 0


### Running the Model on the Modified Data

In [8]:
# Convert DataFrame to numpy array
X_malicious = malicious_data.values

# Get predictions
predictions = loaded_model.predict(X_malicious)

# Print predictions
print(predictions)


['normal' 'dos' 'normal' 'normal' 'normal' 'normal' 'normal' 'normal'
 'normal' 'normal']
