In [None]:
import pandas as pd
import numpy as np
import pickle

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score


In [None]:
columns = [
'duration','protocol_type','service','flag','src_bytes','dst_bytes','land',
'wrong_fragment','urgent','hot','num_failed_logins','logged_in',
'num_compromised','root_shell','su_attempted','num_root',
'num_file_creations','num_shells','num_access_files','num_outbound_cmds',
'is_host_login','is_guest_login','count','srv_count','serror_rate',
'srv_serror_rate','rerror_rate','srv_rerror_rate','same_srv_rate',
'diff_srv_rate','srv_diff_host_rate','dst_host_count',
'dst_host_srv_count','dst_host_same_srv_rate','dst_host_diff_srv_rate',
'dst_host_same_src_port_rate','dst_host_srv_diff_host_rate',
'dst_host_serror_rate','dst_host_srv_serror_rate',
'dst_host_rerror_rate','dst_host_srv_rerror_rate','label','difficulty'
]

data = pd.read_csv("KDDTrain+.txt", names=columns)


In [None]:
DROP_COLS = [
'land','wrong_fragment','urgent','hot','num_failed_logins','logged_in',
'num_compromised','root_shell','su_attempted','num_root',
'num_file_creations','num_shells','num_access_files','num_outbound_cmds',
'is_host_login','is_guest_login','difficulty'
]

data.drop(columns=DROP_COLS, inplace=True)


In [None]:
def map_attack(label):
    if label == 'normal':
        return 'benign'
    elif label in ['neptune','smurf','back','teardrop','land','pod']:
        return 'dos'
    elif label in ['satan','ipsweep','portsweep','nmap']:
        return 'probe'
    elif label in ['guess_passwd','ftp_write','imap','phf','multihop','warezmaster','warezclient']:
        return 'r2l'
    else:
        return 'u2r'

data['attack_class'] = data['label'].apply(map_attack)
data.drop(columns=['label'], inplace=True)


In [None]:
cat_cols = ['protocol_type','service','flag']
for col in cat_cols:
    data[col] = LabelEncoder().fit_transform(data[col])


In [None]:
le = LabelEncoder()
data['label_enc'] = le.fit_transform(data['attack_class'])


In [None]:
FEATURES = data.drop(columns=['attack_class','label_enc']).columns

X = data[FEATURES]
y = data['label_enc']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [None]:
rf = RandomForestClassifier(
    n_estimators=80,
    max_depth=12,
    min_samples_split=10,
    min_samples_leaf=5,
    random_state=42
)

rf.fit(X_train, y_train)


In [None]:
pred = rf.predict(X_test)

print("Accuracy:", accuracy_score(y_test, pred))
print(classification_report(y_test, pred, target_names=le.classes_))


In [None]:
with open("random_forest_model1.pkl","wb") as f:
    pickle.dump(rf, f)

with open("label_encoder.pkl1","wb") as f:
    pickle.dump(le, f)


In [None]:
with open("random_forest_model.pkl","rb") as f:
    rf_model = pickle.load(f)

with open("label_encoder.pkl","rb") as f:
    label_encoder = pickle.load(f)


In [None]:
ws = pd.read_csv(
    "wireshark.csv",
    encoding="latin1",
    on_bad_lines="skip",
    low_memory=False
)

ws.columns = ws.columns.str.strip()

ws = ws.rename(columns={
    'Time':'time',
    'Source':'src_ip',
    'Destination':'dst_ip',
    'Protocol':'protocol',
    'Length':'length'
})

ws = ws[['time','src_ip','dst_ip','protocol','length']]
