In [29]:
import gradio as gr
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb

In [30]:
model1 = joblib.load(r"models/traffic_type_model.joblib")
model2 = joblib.load(r"models/is_Suspicious_model.joblib")
model3 = joblib.load(r"models/attack_type_model.joblib")

In [31]:
le1_classes = [
    'DNS (UDP)', 'FTP (TCP)', 'HTTP/HTTPS (TCP)', 'NTP (UDP)', 'NetBIOS (UDP/TCP)',
    'Other', 'RDP (TCP)', 'SMB (TCP)', 'SNMP (UDP)', 'SSH (TCP)'
]

le2_classes = [
    'Normal Traffic', 'Attack Traffic'
]

le3_classes = [
    'BENIGN', 'Bot', 'DDoS', 'DoS GoldenEye', 'DoS Hulk', 'DoS Slowhttptest',
    'DoS Slowloris', 'FTP-Patator', 'Heartbleed', 'Infiltration', 'PortScan',
    'SSH-Patator', 'Web Attack Brute Force', 'Web Attack Sql Injection', 'Web Attack XSS'
]


In [32]:
top_features1 = ['Fwd IAT Max', 'Fwd IAT Mean', 'Destination Port', 'Packet Length Mean', 'FIN Flag Count', 'Min Packet Length', 'Fwd Packet Length Min', 'Average Packet Size', 'Init_Win_bytes_backward', 'Fwd Packet Length Mean']

top_features2 = ['Bwd Packet Length Std', 'Average Packet Size', 'Bwd Header Length', 'Flow Bytes/s', 'Idle Mean', 'Max Packet Length', 'Fwd Packet Length Std', 'Destination Port', 'FIN Flag Count', 'Active Std']

top_features3 = ['Bwd Packet Length Std', 'Idle Mean', 'act_data_pkt_fwd', 'Bwd Packet Length Mean', 'Flow Bytes/s', 'Total Length of Fwd Packets', 'Fwd IAT Std', 'Bwd Header Length', 'Total Backward Packets', 'Average Packet Size']


In [38]:
def preprocess_input(df, top_features):

    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    for col in df.select_dtypes(include=[np.number]).columns:
        median_val = df[col].median()
        df[col].fillna(median_val, inplace=True)

    for col in df.select_dtypes(include=['object', 'category']).columns:
        df[col] = df[col].astype(str)
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])

    available_features = [col for col in top_features if col in df.columns]
    df = df[available_features]
    return df


In [39]:

def pipeline_predict(uploaded_file):
    df = pd.read_csv(uploaded_file.name)
    df.columns = df.columns.str.strip()

    X1 = preprocess_input(df.copy(), top_features1)
    d1 = xgb.DMatrix(X1)
    pred1_prob = model1.predict(d1)
    pred1 = np.argmax(pred1_prob, axis=1)
    pred1_labels = [le1_classes[i] for i in pred1]
    df['Predicted_Traffic_Type'] = pred1_labels

    X2 = preprocess_input(df.copy(), top_features2)
    d2 = xgb.DMatrix(X2)
    pred2_prob = model2.predict(d2)
    pred2 = (pred2_prob > 0.5).astype(int)
    pred2_labels = [le2_classes[i] for i in pred2]
    df['Predicted_Suspicious'] = pred2_labels

    X3_all = preprocess_input(df.copy(), top_features3)
    d3_all = xgb.DMatrix(X3_all)
    pred3_prob_all = model3.predict(d3_all)
    pred3_all = np.argmax(pred3_prob_all, axis=1)

    attack_preds = []
    for i, is_suspicious in enumerate(pred2):
        if is_suspicious == 1:
            attack_preds.append(le3_classes[pred3_all[i]])
        else:
            attack_preds.append('None')

    df['Predicted_Attack_Type'] = attack_preds

    display_cols = ['Predicted_Traffic_Type', 'Predicted_Suspicious', 'Predicted_Attack_Type']
    result_df = df[display_cols]

    return result_df

In [35]:
gr_interface = gr.Interface(
    fn=pipeline_predict,
    inputs=gr.File(file_types=['.csv']),
    outputs=gr.Dataframe(type="pandas"),
    title="Network Traffic Intrusion Detection",
    description="Upload network traffic CSV file, get traffic type, suspicious prediction, and attack type."
)

if __name__ == "__main__":
    gr_interface.launch()

* Running on local URL:  http://127.0.0.1:7862
* To create a public link, set `share=True` in `launch()`.
