In [11]:
import pandas as pd
import joblib
import time
from datetime import datetime

# Load the saved model and preprocessing assets
model = joblib.load('xgmodel.pkl')
categorical_columns = joblib.load('xgca.pkl')
saved_features = joblib.load('xgfeatures.pkl')

# Path to the continuously updating CSV file
csv_file = 'network_flows201.csv'

# Function to read the latest data from the CSV
def get_latest_data(file_path):
    try:
        data = pd.read_csv(file_path)
        return data.tail(10)  # Adjust the number for recent rows to check
    except Exception as e:
        print(f"Error reading CSV: {e}")
        return pd.DataFrame()

# Convert StartTime to datetime format and seconds since epoch
def convert_start_time(data):
    try:
        data['StartTime'] = pd.to_datetime(data['StartTime'], errors='coerce')
        data['StartTime'] = data['StartTime'].astype('int64') // 10**9
    except Exception as e:
        print(f"Error converting StartTime: {e}")
    return data

# Preprocessing: convert time, handle nulls, and match feature set
# Preprocessing: convert time, handle nulls, and match feature set
def preprocess_data(data):
    try:
        data = convert_start_time(data)
        data = data.fillna(0)  # Fill all null values with 0

        # One-hot encode categorical columns
        data = pd.get_dummies(data, columns=categorical_columns)

        # Add any missing columns with zeros (optimized)
        missing_cols = [col for col in saved_features if col not in data.columns]
        if missing_cols:
            missing_data = pd.DataFrame(0, index=data.index, columns=missing_cols)
            data = pd.concat([data, missing_data], axis=1)

        # Reorder columns to match training data
        data = data[saved_features]
        data = data.copy()  # Defragment DataFrame
        return data
    except Exception as e:
        print(f"Error during preprocessing: {e}")
        return pd.DataFrame()


# Simulate attack flows based on provided examples
def simulate_attack_flow(attack_type='DDOS'):
    attack_flows = {
        'DDOS': {
            'StartTime': time.time(),
            'Flgs': 'e',
            'Proto': 'udp',
            'SrcAddr': '192.168.100.150',
            'DstAddr': '192.168.100.3',
            'State': 'INT',
            'Dur': 14.19,
            'SrcRate': 0.493336,
            'DstRate': 0.0,
            'SrcPkts': 8,
            'DstPkts': 0,
            'SrcBytes': 480,
            'DstBytes': 0,
            'StdDev': 0.975617
        },
        'Reconnaissance': {
            'StartTime': time.time(),
            'Flgs': 'e',
            'Proto': 'tcp',
            'SrcAddr': '192.168.100.147',
            'DstAddr': '192.168.100.7',
            'State': 'RST',
            'Dur': 0.001259,
            'SrcRate': 0.0,
            'DstRate': 0.0,
            'SrcPkts': 1,
            'DstPkts': 1,
            'SrcBytes': 60,
            'DstBytes': 60,
            'StdDev': 0.0
        },
        'Theft': {
            'StartTime': time.time(),
            'Flgs': 'e',
            'Proto': 'tcp',
            'SrcAddr': '192.168.100.3',
            'DstAddr': '192.168.100.150',
            'State': 'RST',
            'Dur': 0.000129,
            'SrcRate': 0.0,
            'DstRate': 0.0,
            'SrcPkts': 1,
            'DstPkts': 1,
            'SrcBytes': 74,
            'DstBytes': 60,
            'StdDev': 0.0
        }
    }
    return pd.DataFrame([attack_flows[attack_type]])

# Make predictions and format output
def predict_intrusions(data, attack_type=None):
    if data.empty:
        return
    try:
        processed_data = preprocess_data(data)
        predictions = model.predict(processed_data)
        data['Prediction'] = ['normal' if pred == 0 else 'attack' for pred in predictions]

        header = f"Simulating {attack_type} Attack Flow..." if attack_type else "Real-time Network Flows"
        print(f"\n{'=' * 50}\n{header}\n{'=' * 50}")
        print(data[['StartTime', 'SrcAddr', 'DstAddr', 'Proto', 'State', 'Prediction']])
        print(f"{'-' * 50}\n")
    except Exception as e:
        print(f"Error during prediction: {e}")

# Real-time intrusion detection loop
print("Starting real-time intrusion detection...\n")

normal_flow_counter = 0
attack_frequency = 2
attack_types = ['DDOS', 'Reconnaissance', 'Theft']
attack_index = 0

while True:
    latest_data = get_latest_data(csv_file)
    
    if not latest_data.empty:
        normal_flow_counter += 1
        predict_intrusions(latest_data)

    if normal_flow_counter >= attack_frequency:
        attack_type = attack_types[attack_index]
        simulated_attack = simulate_attack_flow(attack_type)
        predict_intrusions(simulated_attack, attack_type)
        
        normal_flow_counter = 0
        attack_index = (attack_index + 1) % len(attack_types)
    
    time.sleep(5)


Starting real-time intrusion detection...



  data['StartTime'] = pd.to_datetime(data['StartTime'], errors='coerce')



Real-time Network Flows
       StartTime       SrcAddr         DstAddr Proto State Prediction
2305  1741700999             0               0   man   STA     normal
2306  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2307  1741700999             0               0   man   STA     normal
2308  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2309  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
2310  1741700999             0               0   man   STA     normal
2311  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2312  1741700999             0               0   man   STA     normal
2313  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2314  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
--------------------------------------------------



  data['StartTime'] = pd.to_datetime(data['StartTime'], errors='coerce')



Real-time Network Flows
       StartTime       SrcAddr         DstAddr Proto State Prediction
2310  1741700999             0               0   man   STA     normal
2311  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2312  1741700999             0               0   man   STA     normal
2313  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2314  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
2315  1741700999             0               0   man   STA     normal
2316  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2317  1741700999             0               0   man   STA     normal
2318  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2319  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
--------------------------------------------------



  data['StartTime'] = pd.to_datetime(data['StartTime'], errors='coerce')



Real-time Network Flows
       StartTime       SrcAddr         DstAddr Proto State Prediction
2315  1741700999             0               0   man   STA     normal
2316  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2317  1741700999             0               0   man   STA     normal
2318  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2319  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
2320  1741700999             0               0   man   STA     normal
2321  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2322  1741700999             0               0   man   STA     normal
2323  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2324  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
--------------------------------------------------



  data['StartTime'] = pd.to_datetime(data['StartTime'], errors='coerce')



Real-time Network Flows
       StartTime       SrcAddr         DstAddr Proto State Prediction
2320  1741700999             0               0   man   STA     normal
2321  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2322  1741700999             0               0   man   STA     normal
2323  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2324  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
2325  1741700999             0               0   man   STA     normal
2326  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2327  1741700999             0               0   man   STA     normal
2328  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2329  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
--------------------------------------------------



  data['StartTime'] = pd.to_datetime(data['StartTime'], errors='coerce')



Real-time Network Flows
       StartTime       SrcAddr         DstAddr Proto State Prediction
2325  1741700999             0               0   man   STA     normal
2326  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2327  1741700999             0               0   man   STA     normal
2328  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2329  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
2330  1741700999             0               0   man   STA     normal
2331  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2332  1741700999             0               0   man   STA     normal
2333  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2334  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
--------------------------------------------------



  data['StartTime'] = pd.to_datetime(data['StartTime'], errors='coerce')



Real-time Network Flows
       StartTime       SrcAddr         DstAddr Proto State Prediction
2330  1741700999             0               0   man   STA     normal
2331  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2332  1741700999             0               0   man   STA     normal
2333  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2334  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
2335  1741700999             0               0   man   STA     normal
2336  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2337  1741700999             0               0   man   STA     normal
2338  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2339  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
--------------------------------------------------



  data['StartTime'] = pd.to_datetime(data['StartTime'], errors='coerce')



Real-time Network Flows
       StartTime       SrcAddr         DstAddr Proto State Prediction
2335  1741700999             0               0   man   STA     normal
2336  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2337  1741700999             0               0   man   STA     normal
2338  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2339  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
2340  1741700999             0               0   man   STA     normal
2341  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2342  1741700999             0               0   man   STA     normal
2343  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2344  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
--------------------------------------------------



  data['StartTime'] = pd.to_datetime(data['StartTime'], errors='coerce')



Real-time Network Flows
       StartTime       SrcAddr         DstAddr Proto State Prediction
2340  1741700999             0               0   man   STA     normal
2341  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2342  1741700999             0               0   man   STA     normal
2343  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2344  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
2345  1741700999             0               0   man   STA     normal
2346  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2347  1741700999             0               0   man   STA     normal
2348  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2349  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
--------------------------------------------------



  data['StartTime'] = pd.to_datetime(data['StartTime'], errors='coerce')



Real-time Network Flows
       StartTime       SrcAddr         DstAddr Proto State Prediction
2345  1741700999             0               0   man   STA     normal
2346  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2347  1741700999             0               0   man   STA     normal
2348  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2349  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
2350  1741700999             0               0   man   STA     normal
2351  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2352  1741700999             0               0   man   STA     normal
2353  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2354  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
--------------------------------------------------



  data['StartTime'] = pd.to_datetime(data['StartTime'], errors='coerce')



Real-time Network Flows
       StartTime       SrcAddr         DstAddr Proto State Prediction
2350  1741700999             0               0   man   STA     normal
2351  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2352  1741700999             0               0   man   STA     normal
2353  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2354  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
2355  1741700999             0               0   man   STA     normal
2356  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2357  1741700999             0               0   man   STA     normal
2358  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2359  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
--------------------------------------------------



  data['StartTime'] = pd.to_datetime(data['StartTime'], errors='coerce')



Real-time Network Flows
       StartTime       SrcAddr         DstAddr Proto State Prediction
2355  1741700999             0               0   man   STA     normal
2356  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2357  1741700999             0               0   man   STA     normal
2358  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2359  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
2360  1741700999             0               0   man   STA     normal
2361  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2362  1741700999             0               0   man   STA     normal
2363  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2364  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
--------------------------------------------------


Simulating Theft Attack Flow...
   StartTime        SrcAddr          DstAddr Proto State Prediction
0          1  192.168.100.3  192.168.100.150   tcp  

  data['StartTime'] = pd.to_datetime(data['StartTime'], errors='coerce')



Real-time Network Flows
       StartTime       SrcAddr         DstAddr Proto State Prediction
2365  1741700999             0               0   man   STA     normal
2366  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2367  1741700999             0               0   man   STA     normal
2368  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2369  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
2370  1741700999             0               0   man   STA     normal
2371  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2372  1741700999             0               0   man   STA     normal
2373  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2374  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
--------------------------------------------------



  data['StartTime'] = pd.to_datetime(data['StartTime'], errors='coerce')



Real-time Network Flows
       StartTime       SrcAddr         DstAddr Proto State Prediction
2370  1741700999             0               0   man   STA     normal
2371  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2372  1741700999             0               0   man   STA     normal
2373  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2374  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
2375  1741700999             0               0   man   STA     normal
2376  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2377  1741700999             0               0   man   STA     normal
2378  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2379  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
--------------------------------------------------



  data['StartTime'] = pd.to_datetime(data['StartTime'], errors='coerce')



Real-time Network Flows
       StartTime       SrcAddr         DstAddr Proto State Prediction
2375  1741700999             0               0   man   STA     normal
2376  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2377  1741700999             0               0   man   STA     normal
2378  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2379  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
2380  1741700999             0               0   man   STA     normal
2381  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2382  1741700999             0               0   man   STA     normal
2383  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2384  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
--------------------------------------------------


Simulating Theft Attack Flow...
   StartTime        SrcAddr          DstAddr Proto State Prediction
0          1  192.168.100.3  192.168.100.150   tcp  

  data['StartTime'] = pd.to_datetime(data['StartTime'], errors='coerce')



Real-time Network Flows
       StartTime       SrcAddr         DstAddr Proto State Prediction
2380  1741700999             0               0   man   STA     normal
2381  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2382  1741700999             0               0   man   STA     normal
2383  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2384  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
2385  1741700999             0               0   man   STA     normal
2386  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2387  1741700999             0               0   man   STA     normal
2388  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2389  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
--------------------------------------------------



  data['StartTime'] = pd.to_datetime(data['StartTime'], errors='coerce')



Real-time Network Flows
       StartTime       SrcAddr         DstAddr Proto State Prediction
2385  1741700999             0               0   man   STA     normal
2386  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2387  1741700999             0               0   man   STA     normal
2388  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2389  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
2390  1741700999             0               0   man   STA     normal
2391  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2392  1741700999             0               0   man   STA     normal
2393  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2394  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
--------------------------------------------------



  data['StartTime'] = pd.to_datetime(data['StartTime'], errors='coerce')



Real-time Network Flows
       StartTime       SrcAddr         DstAddr Proto State Prediction
2390  1741700999             0               0   man   STA     normal
2391  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2392  1741700999             0               0   man   STA     normal
2393  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2394  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
2395  1741700999             0               0   man   STA     normal
2396  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2397  1741700999             0               0   man   STA     normal
2398  1741700993  10.220.10.66      23.9.220.6   tcp   INT     normal
2399  1741700994  10.220.10.66  184.86.248.112   tcp   INT     normal
--------------------------------------------------



KeyboardInterrupt: 

In [7]:
pip install xgboost


Collecting xgboost
  Downloading xgboost-2.1.4-py3-none-manylinux_2_28_x86_64.whl.metadata (2.1 kB)
Collecting nvidia-nccl-cu12 (from xgboost)
  Downloading nvidia_nccl_cu12-2.25.1-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.8 kB)
Downloading xgboost-2.1.4-py3-none-manylinux_2_28_x86_64.whl (223.6 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m223.6/223.6 MB[0m [31m41.9 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m0:01[0m:01[0m
[?25hDownloading nvidia_nccl_cu12-2.25.1-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (201.4 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m201.4/201.4 MB[0m [31m41.8 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m0:01[0m:01[0m
[?25hInstalling collected packages: nvidia-nccl-cu12, xgboost
Successfully installed nvidia-nccl-cu12-2.25.1 xgboost-2.1.4
Note: you may need to restart the kernel to use updated packages.
