In [2]:
import numpy as np
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, classification_report, f1_score
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder, StandardScaler

TARGET_VARIABLE = 'Traffic Subtype'
DROP_COLUMNS = ['Flow ID', 'Src IP', 'Src Port', 'Dst IP', 'Dst Port', 'Timestamp']
TARGET_TO_DROP = {'Label': ['Traffic Type', 'Traffic Subtype'],
                  'Traffic Type': ['Label', 'Traffic Subtype'],
                  'Traffic Subtype': ['Label', 'Traffic Type']}

In [3]:
print('Loading dataset...')
df = pd.read_csv('D:/Projects/IDS/Dataset/csv/data.csv')
print("data loaded")
# Drop columns and duplicates
df = df.drop(columns=DROP_COLUMNS)
df = df.round(3).drop_duplicates()
df = df.drop(columns=TARGET_TO_DROP[TARGET_VARIABLE])

Loading dataset...
data loaded


In [4]:
# Features and target
X = df.drop(TARGET_VARIABLE, axis=1)
y = df[TARGET_VARIABLE]
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Stratified downsampling to 80,000 samples
print(f"Original total samples: {len(X)}")
X_sampled, _, y_sampled, _ = train_test_split(
    X, y_encoded,
    stratify=y_encoded,
    train_size=80000,
    random_state=42
)

# Remove classes with fewer than 2 samples
class_counts = pd.Series(y_sampled).value_counts()
valid_classes = class_counts[class_counts >= 2].index
mask = pd.Series(y_sampled).isin(valid_classes)

# Reset index before applying boolean mask  # change
X_sampled = X_sampled.reset_index(drop=True)
y_sampled = pd.Series(y_sampled).reset_index(drop=True)

# Compute valid class mask
class_counts = y_sampled.value_counts()
valid_classes = class_counts[class_counts >= 2].index
mask = y_sampled.isin(valid_classes)

# Apply the mask
X = X_sampled[mask]
y_encoded = y_sampled[mask].values

print(f"Sampled total samples: {len(X)}")
print(f"Number of classes after filtering: {len(np.unique(y_encoded))}")

Original total samples: 7252154
Sampled total samples: 79997
Number of classes after filtering: 26


In [5]:
pd.set_option("display.max_columns", None)
# Set the display option to show all rows
pd.set_option('display.max_rows', None)
display(X.head(5))
display(X.tail(5))

print("shape=",X.shape)

Unnamed: 0,Protocol,Flow Duration,Total Fwd Packet,Total Bwd packets,Total Length of Fwd Packet,Total Length of Bwd Packet,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,Bwd Packet Length Max,Bwd Packet Length Min,Bwd Packet Length Mean,Bwd Packet Length Std,Flow Bytes/s,Flow Packets/s,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Flow IAT Min,Fwd IAT Total,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Fwd IAT Min,Bwd IAT Total,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Bwd PSH Flags,Fwd URG Flags,Bwd URG Flags,Fwd Header Length,Bwd Header Length,Fwd Packets/s,Bwd Packets/s,Packet Length Min,Packet Length Max,Packet Length Mean,Packet Length Std,Packet Length Variance,FIN Flag Count,SYN Flag Count,RST Flag Count,PSH Flag Count,ACK Flag Count,URG Flag Count,CWR Flag Count,ECE Flag Count,Down/Up Ratio,Average Packet Size,Fwd Segment Size Avg,Bwd Segment Size Avg,Fwd Bytes/Bulk Avg,Fwd Packet/Bulk Avg,Fwd Bulk Rate Avg,Bwd Bytes/Bulk Avg,Bwd Packet/Bulk Avg,Bwd Bulk Rate Avg,Subflow Fwd Packets,Subflow Fwd Bytes,Subflow Bwd Packets,Subflow Bwd Bytes,FWD Init Win Bytes,Bwd Init Win Bytes,Fwd Act Data Pkts,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min
0,6.0,10115029.0,2.0,1.0,1000.0,0.0,500.0,500.0,500.0,0.0,0.0,0.0,0.0,0.0,98.863,0.297,5057514.0,7121813.33,10093397.0,21632.0,10093397.0,10093400.0,0.0,10093397.0,10093397.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,20.0,0.198,0.099,0.0,500.0,375.0,250.0,62500.0,0.0,0.0,1.0,0.0,1.0,0.0,2.0,0.0,0.0,500.0,500.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1000.0,1.0,0.0,512.0,0.0,1.0,20.0,0.0,0.0,0.0,0.0,10093397.0,0.0,10093397.0,10093397.0
1,6.0,23592.0,1.0,1.0,50.0,0.0,50.0,50.0,50.0,0.0,0.0,0.0,0.0,0.0,2119.362,84.774,23592.0,0.0,23592.0,23592.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20.0,20.0,42.387,42.387,0.0,50.0,33.333,28.868,833.333,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,50.0,50.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1000.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,6.0,2861194.0,4.0,0.0,200.0,0.0,50.0,50.0,50.0,0.0,0.0,0.0,0.0,0.0,69.901,1.398,953731.3,495716.77,1500049.0,532627.0,2861194.0,953731.3,495716.77,1500049.0,532627.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80.0,0.0,1.398,0.0,50.0,50.0,50.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,62.5,50.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,200.0,0.0,0.0,512.0,0.0,3.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,6.0,12463665.0,2.0,0.0,80.0,0.0,50.0,30.0,40.0,14.142,0.0,0.0,0.0,0.0,6.419,0.16,12463660.0,0.0,12463665.0,12463665.0,12463665.0,12463660.0,0.0,12463665.0,12463665.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0,0.0,0.16,0.0,30.0,50.0,43.333,11.547,133.333,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,65.0,40.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,80.0,0.0,0.0,512.0,0.0,1.0,20.0,0.0,0.0,0.0,0.0,12463665.0,0.0,12463665.0,12463665.0
4,17.0,8428962.0,3.0,0.0,114.0,0.0,38.0,38.0,38.0,0.0,0.0,0.0,0.0,0.0,13.525,0.356,4214481.0,2120313.424,5713769.0,2715193.0,8428962.0,4214481.0,2120313.424,5713769.0,2715193.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,24.0,0.0,0.356,0.0,38.0,38.0,38.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,50.667,38.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,57.0,0.0,0.0,0.0,0.0,2.0,8.0,2715193.0,0.0,2715193.0,2715193.0,5713769.0,0.0,5713769.0,5713769.0


Unnamed: 0,Protocol,Flow Duration,Total Fwd Packet,Total Bwd packets,Total Length of Fwd Packet,Total Length of Bwd Packet,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,Bwd Packet Length Max,Bwd Packet Length Min,Bwd Packet Length Mean,Bwd Packet Length Std,Flow Bytes/s,Flow Packets/s,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Flow IAT Min,Fwd IAT Total,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Fwd IAT Min,Bwd IAT Total,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Bwd PSH Flags,Fwd URG Flags,Bwd URG Flags,Fwd Header Length,Bwd Header Length,Fwd Packets/s,Bwd Packets/s,Packet Length Min,Packet Length Max,Packet Length Mean,Packet Length Std,Packet Length Variance,FIN Flag Count,SYN Flag Count,RST Flag Count,PSH Flag Count,ACK Flag Count,URG Flag Count,CWR Flag Count,ECE Flag Count,Down/Up Ratio,Average Packet Size,Fwd Segment Size Avg,Bwd Segment Size Avg,Fwd Bytes/Bulk Avg,Fwd Packet/Bulk Avg,Fwd Bulk Rate Avg,Bwd Bytes/Bulk Avg,Bwd Packet/Bulk Avg,Bwd Bulk Rate Avg,Subflow Fwd Packets,Subflow Fwd Bytes,Subflow Bwd Packets,Subflow Bwd Bytes,FWD Init Win Bytes,Bwd Init Win Bytes,Fwd Act Data Pkts,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min
79995,6.0,42784.0,1.0,1.0,500.0,0.0,500.0,500.0,500.0,0.0,0.0,0.0,0.0,0.0,11686.612,46.746,42784.0,0.0,42784.0,42784.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,20.0,20.0,23.373,23.373,0.0,500.0,333.333,288.675,83333.333,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,500.0,500.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,512.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
79996,6.0,24234713.0,2.0,0.0,2712.0,0.0,1356.0,1356.0,1356.0,0.0,0.0,0.0,0.0,0.0,111.906,0.083,24234713.0,0.0,24234713.0,24234713.0,24234713.0,24234713.0,0.0,24234713.0,24234713.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,0.0,0.083,0.0,1356.0,1356.0,1356.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,2034.0,1356.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2712.0,0.0,0.0,512.0,0.0,1.0,20.0,0.0,0.0,0.0,0.0,24234713.0,0.0,24234713.0,24234713.0
79997,6.0,23443.0,1.0,1.0,50.0,0.0,50.0,50.0,50.0,0.0,0.0,0.0,0.0,0.0,2132.833,85.313,23443.0,0.0,23443.0,23443.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20.0,20.0,42.657,42.657,0.0,50.0,33.333,28.868,833.333,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,50.0,50.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,50.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
79998,6.0,75145.0,1.0,1.0,50.0,0.0,50.0,50.0,50.0,0.0,0.0,0.0,0.0,0.0,665.38,26.615,75145.0,0.0,75145.0,75145.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,20.0,20.0,13.308,13.308,0.0,50.0,33.333,28.868,833.333,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,50.0,50.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,512.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
79999,17.0,2984050.0,2.0,0.0,1000.0,0.0,500.0,500.0,500.0,0.0,0.0,0.0,0.0,0.0,335.115,0.67,2984050.0,0.0,2984050.0,2984050.0,2984050.0,2984050.0,0.0,2984050.0,2984050.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.0,0.0,0.67,0.0,500.0,500.0,500.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,750.0,500.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1000.0,0.0,0.0,0.0,0.0,1.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


shape= (79997, 77)


In [6]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded,
    stratify=y_encoded,
    test_size=0.2,
    random_state=42
)

# Column types
numerical_cols = X_train.select_dtypes(include=[np.number]).columns.to_list()
numerical_cols.remove('Protocol')
categorical_cols = X_train.select_dtypes(include=[object]).columns.to_list()
categorical_cols.append('Protocol')

# Transformers
numerical_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])
categorical_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('ordinal', OrdinalEncoder())  # change: replaces one-hot
]) 
preprocessor = ColumnTransformer([
    ('num', numerical_transformer, numerical_cols),
    ('cat', categorical_transformer, categorical_cols)
])

In [7]:
display(X_train.head(5))

Unnamed: 0,Protocol,Flow Duration,Total Fwd Packet,Total Bwd packets,Total Length of Fwd Packet,Total Length of Bwd Packet,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,Bwd Packet Length Max,Bwd Packet Length Min,Bwd Packet Length Mean,Bwd Packet Length Std,Flow Bytes/s,Flow Packets/s,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Flow IAT Min,Fwd IAT Total,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Fwd IAT Min,Bwd IAT Total,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Bwd PSH Flags,Fwd URG Flags,Bwd URG Flags,Fwd Header Length,Bwd Header Length,Fwd Packets/s,Bwd Packets/s,Packet Length Min,Packet Length Max,Packet Length Mean,Packet Length Std,Packet Length Variance,FIN Flag Count,SYN Flag Count,RST Flag Count,PSH Flag Count,ACK Flag Count,URG Flag Count,CWR Flag Count,ECE Flag Count,Down/Up Ratio,Average Packet Size,Fwd Segment Size Avg,Bwd Segment Size Avg,Fwd Bytes/Bulk Avg,Fwd Packet/Bulk Avg,Fwd Bulk Rate Avg,Bwd Bytes/Bulk Avg,Bwd Packet/Bulk Avg,Bwd Bulk Rate Avg,Subflow Fwd Packets,Subflow Fwd Bytes,Subflow Bwd Packets,Subflow Bwd Bytes,FWD Init Win Bytes,Bwd Init Win Bytes,Fwd Act Data Pkts,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min
66379,6.0,30214879.0,2.0,1.0,1000.0,0.0,500.0,500.0,500.0,0.0,0.0,0.0,0.0,0.0,33.096,0.099,15107439.5,21343700.0,30199713.0,15166.0,30199713.0,30199710.0,0.0,30199713.0,30199713.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,20.0,0.066,0.033,0.0,500.0,375.0,250.0,62500.0,0.0,0.0,1.0,0.0,1.0,0.0,2.0,0.0,0.0,500.0,500.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1000.0,1.0,0.0,512.0,0.0,1.0,20.0,0.0,0.0,0.0,0.0,30199713.0,0.0,30199713.0,30199713.0
19296,6.0,9774.0,1.0,1.0,1356.0,0.0,1356.0,1356.0,1356.0,0.0,0.0,0.0,0.0,0.0,138735.421,204.625,9774.0,0.0,9774.0,9774.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20.0,20.0,102.312,102.312,0.0,1356.0,904.0,782.887,612912.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1356.0,1356.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,512.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
44001,6.0,9316296.0,4.0,5.0,410.0,761.0,410.0,0.0,102.5,205.0,761.0,0.0,152.2,340.33,125.694,0.966,1164537.0,3154951.0,8971787.0,23.0,9014974.0,3004991.0,5175362.495,8980953.0,23.0,9304349.0,2326087.25,4496326.873,9070327.0,31240.0,0.0,0.0,0.0,0.0,136.0,168.0,0.429,0.537,0.0,761.0,117.1,260.362,67788.544,2.0,2.0,0.0,2.0,8.0,0.0,0.0,0.0,1.0,130.111,102.5,152.2,0.0,0.0,0.0,0.0,0.0,0.0,4.0,410.0,5.0,761.0,65280.0,503.0,1.0,32.0,43187.0,0.0,43187.0,43187.0,8971787.0,0.0,8971787.0,8971787.0
36497,6.0,15383.0,1.0,1.0,50.0,0.0,50.0,50.0,50.0,0.0,0.0,0.0,0.0,0.0,3250.341,130.014,15383.0,0.0,15383.0,15383.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20.0,20.0,65.007,65.007,0.0,50.0,33.333,28.868,833.333,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,50.0,50.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,50.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6778,6.0,10104955.0,2.0,1.0,1000.0,0.0,500.0,500.0,500.0,0.0,0.0,0.0,0.0,0.0,98.961,0.297,5052477.5,7050919.0,10038230.0,66725.0,10038230.0,10038230.0,0.0,10038230.0,10038230.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,40.0,20.0,0.198,0.099,0.0,500.0,375.0,250.0,62500.0,0.0,0.0,1.0,0.0,1.0,2.0,0.0,0.0,0.0,500.0,500.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1000.0,1.0,0.0,512.0,0.0,1.0,20.0,0.0,0.0,0.0,0.0,10038230.0,0.0,10038230.0,10038230.0


In [8]:
# Fit and transform
X_train_transformed = preprocessor.fit_transform(X_train)
X_test_transformed = preprocessor.transform(X_test)
feature_names = preprocessor.get_feature_names_out()

X_train_df = pd.DataFrame(X_train_transformed, columns=feature_names)
X_test_df = pd.DataFrame(X_test_transformed, columns=feature_names)

# Angle encoding: Normalize to [0, π]
min_vals = X_train_df.min()
max_vals = X_train_df.max()
range_vals = max_vals - min_vals
range_vals[range_vals == 0] = 1  # avoid division by zero

X_train_angle = ((X_train_df - min_vals) / range_vals) * np.pi
X_test_angle = ((X_test_df - min_vals) / range_vals) * np.pi

# Add labels
X_train_angle['label'] = y_train
X_test_angle['label'] = y_test

X_train_angle.to_excel('D:/Projects/IDS/Dataset/tii_ssrc23_angle_encoded_train.xlsx', index=False)
X_test_angle.to_excel('D:/Projects/IDS/Dataset/tii_ssrc23_angle_encoded_test.xlsx', index=False)

In [9]:
print("Training data set with angle embedding")
display(X_train_angle.head(5))
display(X_train_angle.tail(5))

print("Testing data set with angle embedding")
display(X_test_angle.head(5))
display(X_test_angle.tail(5))

Training data set with angle embedding


Unnamed: 0,num__Flow Duration,num__Total Fwd Packet,num__Total Bwd packets,num__Total Length of Fwd Packet,num__Total Length of Bwd Packet,num__Fwd Packet Length Max,num__Fwd Packet Length Min,num__Fwd Packet Length Mean,num__Fwd Packet Length Std,num__Bwd Packet Length Max,num__Bwd Packet Length Min,num__Bwd Packet Length Mean,num__Bwd Packet Length Std,num__Flow Bytes/s,num__Flow Packets/s,num__Flow IAT Mean,num__Flow IAT Std,num__Flow IAT Max,num__Flow IAT Min,num__Fwd IAT Total,num__Fwd IAT Mean,num__Fwd IAT Std,num__Fwd IAT Max,num__Fwd IAT Min,num__Bwd IAT Total,num__Bwd IAT Mean,num__Bwd IAT Std,num__Bwd IAT Max,num__Bwd IAT Min,num__Fwd PSH Flags,num__Bwd PSH Flags,num__Fwd URG Flags,num__Bwd URG Flags,num__Fwd Header Length,num__Bwd Header Length,num__Fwd Packets/s,num__Bwd Packets/s,num__Packet Length Min,num__Packet Length Max,num__Packet Length Mean,num__Packet Length Std,num__Packet Length Variance,num__FIN Flag Count,num__SYN Flag Count,num__RST Flag Count,num__PSH Flag Count,num__ACK Flag Count,num__URG Flag Count,num__CWR Flag Count,num__ECE Flag Count,num__Down/Up Ratio,num__Average Packet Size,num__Fwd Segment Size Avg,num__Bwd Segment Size Avg,num__Fwd Bytes/Bulk Avg,num__Fwd Packet/Bulk Avg,num__Fwd Bulk Rate Avg,num__Bwd Bytes/Bulk Avg,num__Bwd Packet/Bulk Avg,num__Bwd Bulk Rate Avg,num__Subflow Fwd Packets,num__Subflow Fwd Bytes,num__Subflow Bwd Packets,num__Subflow Bwd Bytes,num__FWD Init Win Bytes,num__Bwd Init Win Bytes,num__Fwd Act Data Pkts,num__Fwd Seg Size Min,num__Active Mean,num__Active Std,num__Active Max,num__Active Min,num__Idle Mean,num__Idle Std,num__Idle Max,num__Idle Min,cat__Protocol,label
0,0.791102,3.4e-05,0.000674,0.000311,0.0,1.148243,1.148243,1.148243,0.0,0.0,0.0,0.0,0.0,1.6e-05,1.272345e-07,0.428203,1.317535,0.854866,0.000435,0.790705,0.855976,0.0,0.854866,0.855976,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000847,0.000421,7.539822e-08,2.1e-05,0.0,1.148243,0.861182,1.003208,0.320355,0.0,0.0,1.570796,0.0,0.000338,0.0,0.483322,0.0,0.0,0.765495,1.148243,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000341,0.107545,0.006756,0.0,0.02464,0.0,0.000409,1.427997,0.0,0.0,0.0,0.0,0.854866,0.0,0.854866,0.854866,1.570796,8
1,0.000256,0.0,0.000674,0.000421,0.0,3.114035,3.114035,3.114035,0.0,0.0,0.0,0.0,0.0,0.065378,0.0003213959,0.000277,0.0,0.000277,0.000282,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000424,0.000421,0.000160683,0.063963,0.0,3.114035,2.076023,3.141593,3.141593,1.047198,0.0,1.570796,0.0,0.000338,0.0,0.0,0.0,0.523599,2.076023,3.114035,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02464,0.0,0.0,1.427997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.570796,10
2,0.243924,0.000102,0.003372,0.000127,0.001442,0.941559,0.0,0.23539,0.69104,1.773555,0.0,0.354711,1.121696,5.9e-05,1.489115e-06,0.033007,0.194753,0.253965,6e-06,0.236035,0.085173,0.241747,0.254225,6.519084e-07,0.243621,0.268382,0.45,0.436055,0.003604,0.0,0.0,0.0,0.0,0.002881,0.00354,6.455973e-07,0.000336,0.0,1.747626,0.268918,1.044789,0.347463,2.094395,0.029638,0.0,0.001107,0.002705,0.0,0.0,0.0,0.523599,0.199199,0.23539,0.354711,0.0,0.0,0.0,0.0,0.0,0.0,0.000681,0.044093,0.033781,0.082656,3.141593,0.024207,0.000409,2.284795,0.003625,0.0,0.001938,0.013498,0.253965,0.0,0.253965,0.253965,1.570796,11
3,0.000403,0.0,0.000674,1.6e-05,0.0,0.114824,0.114824,0.114824,0.0,0.0,0.0,0.0,0.0,0.001532,0.0002041972,0.000436,0.0,0.000435,0.000441,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000424,0.000421,0.0001020845,0.040641,0.0,0.114824,0.076549,0.115842,0.004271,0.0,0.014819,1.570796,0.0,0.000338,0.0,0.0,0.0,0.523599,0.07655,0.114824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.002406,0.0,0.0,1.427997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.570796,16
4,0.264573,3.4e-05,0.000674,0.000311,0.0,1.148243,1.148243,1.148243,0.0,0.0,0.0,0.0,0.0,4.7e-05,4.382522e-07,0.143207,0.43525,0.284153,0.001896,0.262826,0.284522,0.0,0.284153,0.284522,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.141593,0.0,0.000847,0.000421,2.827433e-07,6.2e-05,0.0,1.148243,0.861182,1.003208,0.320355,0.0,0.0,1.570796,0.0,0.000338,0.448799,0.0,0.0,0.0,0.765495,1.148243,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000341,0.107545,0.006756,0.0,0.02464,0.0,0.000409,1.427997,0.0,0.0,0.0,0.0,0.284153,0.0,0.284153,0.284153,1.570796,18


Unnamed: 0,num__Flow Duration,num__Total Fwd Packet,num__Total Bwd packets,num__Total Length of Fwd Packet,num__Total Length of Bwd Packet,num__Fwd Packet Length Max,num__Fwd Packet Length Min,num__Fwd Packet Length Mean,num__Fwd Packet Length Std,num__Bwd Packet Length Max,num__Bwd Packet Length Min,num__Bwd Packet Length Mean,num__Bwd Packet Length Std,num__Flow Bytes/s,num__Flow Packets/s,num__Flow IAT Mean,num__Flow IAT Std,num__Flow IAT Max,num__Flow IAT Min,num__Fwd IAT Total,num__Fwd IAT Mean,num__Fwd IAT Std,num__Fwd IAT Max,num__Fwd IAT Min,num__Bwd IAT Total,num__Bwd IAT Mean,num__Bwd IAT Std,num__Bwd IAT Max,num__Bwd IAT Min,num__Fwd PSH Flags,num__Bwd PSH Flags,num__Fwd URG Flags,num__Bwd URG Flags,num__Fwd Header Length,num__Bwd Header Length,num__Fwd Packets/s,num__Bwd Packets/s,num__Packet Length Min,num__Packet Length Max,num__Packet Length Mean,num__Packet Length Std,num__Packet Length Variance,num__FIN Flag Count,num__SYN Flag Count,num__RST Flag Count,num__PSH Flag Count,num__ACK Flag Count,num__URG Flag Count,num__CWR Flag Count,num__ECE Flag Count,num__Down/Up Ratio,num__Average Packet Size,num__Fwd Segment Size Avg,num__Bwd Segment Size Avg,num__Fwd Bytes/Bulk Avg,num__Fwd Packet/Bulk Avg,num__Fwd Bulk Rate Avg,num__Bwd Bytes/Bulk Avg,num__Bwd Packet/Bulk Avg,num__Bwd Bulk Rate Avg,num__Subflow Fwd Packets,num__Subflow Fwd Bytes,num__Subflow Bwd Packets,num__Subflow Bwd Bytes,num__FWD Init Win Bytes,num__Bwd Init Win Bytes,num__Fwd Act Data Pkts,num__Fwd Seg Size Min,num__Active Mean,num__Active Std,num__Active Max,num__Active Min,num__Idle Mean,num__Idle Std,num__Idle Max,num__Idle Min,cat__Protocol,label
63992,0.001943,0.0,0.000674,9e-06,0.0,0.068895,0.068895,0.068895,0.0,0.0,0.0,0.0,0.0,0.00019,4.229997e-05,0.002104,0.0,0.002101,0.002109,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000847,0.000421,2.113506e-05,0.008423,0.0,0.068895,0.04593,0.069506,0.001538,0.0,0.0,1.570796,0.0,0.000338,0.0,0.241661,0.0,0.523599,0.04593,0.068895,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02464,0.0,0.0,2.855993,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.570796,8
63993,0.685894,3.4e-05,0.000674,2.5e-05,0.0,0.114824,0.068895,0.091859,0.047672,0.0,0.0,0.0,0.0,1e-06,1.523672e-07,0.371256,1.139125,0.740143,0.001414,0.684592,0.741104,0.0,0.740143,0.741104,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001271,0.000421,9.110619e-08,2.4e-05,0.0,0.114824,0.063153,0.082729,0.002178,0.0,0.0,1.570796,0.0,0.000338,0.0,0.483322,0.0,0.0,0.056137,0.091859,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000341,0.008604,0.006756,0.0,0.02464,0.0,0.000409,1.427997,0.0,0.0,0.0,0.0,0.740143,0.0,0.740143,0.740143,1.570796,8
63994,0.121876,3.4e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.471681e-07,0.131937,0.0,0.131766,0.131942,0.121876,0.131937,0.0,0.131766,0.131937,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001356,0.0,6.471681e-07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.570796,0.0,0.000676,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000341,0.0,0.0,0.0,0.024255,0.0,0.0,2.284795,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.570796,11
63995,0.003181,0.0,0.000674,1.6e-05,0.0,0.114824,0.114824,0.114824,0.0,0.0,0.0,0.0,0.0,0.000194,2.582546e-05,0.003444,0.0,0.00344,0.003449,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000424,0.000421,1.289938e-05,0.005145,0.0,0.114824,0.076549,0.115842,0.004271,0.0,0.0,1.570796,0.0,0.000338,0.0,0.241661,0.0,0.523599,0.07655,0.114824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02464,0.0,0.0,1.427997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.570796,8
63996,0.504506,3.4e-05,0.000674,3.1e-05,0.0,0.114824,0.114824,0.114824,0.0,0.0,0.0,0.0,0.0,2e-06,2.167699e-07,0.273076,0.83815,0.544497,0.000953,0.50363,0.545204,0.0,0.544497,0.545204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000847,0.000421,1.350885e-07,3.3e-05,0.0,0.114824,0.086118,0.100321,0.003204,0.0,0.0,1.570796,0.0,0.000338,0.0,0.483322,0.0,0.0,0.07655,0.114824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000341,0.010754,0.006756,0.0,0.02464,0.0,0.000409,1.427997,0.0,0.0,0.0,0.0,0.544497,0.0,0.544497,0.544497,1.570796,8


Testing data set with angle embedding


Unnamed: 0,num__Flow Duration,num__Total Fwd Packet,num__Total Bwd packets,num__Total Length of Fwd Packet,num__Total Length of Bwd Packet,num__Fwd Packet Length Max,num__Fwd Packet Length Min,num__Fwd Packet Length Mean,num__Fwd Packet Length Std,num__Bwd Packet Length Max,num__Bwd Packet Length Min,num__Bwd Packet Length Mean,num__Bwd Packet Length Std,num__Flow Bytes/s,num__Flow Packets/s,num__Flow IAT Mean,num__Flow IAT Std,num__Flow IAT Max,num__Flow IAT Min,num__Fwd IAT Total,num__Fwd IAT Mean,num__Fwd IAT Std,num__Fwd IAT Max,num__Fwd IAT Min,num__Bwd IAT Total,num__Bwd IAT Mean,num__Bwd IAT Std,num__Bwd IAT Max,num__Bwd IAT Min,num__Fwd PSH Flags,num__Bwd PSH Flags,num__Fwd URG Flags,num__Bwd URG Flags,num__Fwd Header Length,num__Bwd Header Length,num__Fwd Packets/s,num__Bwd Packets/s,num__Packet Length Min,num__Packet Length Max,num__Packet Length Mean,num__Packet Length Std,num__Packet Length Variance,num__FIN Flag Count,num__SYN Flag Count,num__RST Flag Count,num__PSH Flag Count,num__ACK Flag Count,num__URG Flag Count,num__CWR Flag Count,num__ECE Flag Count,num__Down/Up Ratio,num__Average Packet Size,num__Fwd Segment Size Avg,num__Bwd Segment Size Avg,num__Fwd Bytes/Bulk Avg,num__Fwd Packet/Bulk Avg,num__Fwd Bulk Rate Avg,num__Bwd Bytes/Bulk Avg,num__Bwd Packet/Bulk Avg,num__Bwd Bulk Rate Avg,num__Subflow Fwd Packets,num__Subflow Fwd Bytes,num__Subflow Bwd Packets,num__Subflow Bwd Bytes,num__FWD Init Win Bytes,num__Bwd Init Win Bytes,num__Fwd Act Data Pkts,num__Fwd Seg Size Min,num__Active Mean,num__Active Std,num__Active Max,num__Active Min,num__Idle Mean,num__Idle Std,num__Idle Max,num__Idle Min,cat__Protocol,label
0,0.221711,3.4e-05,0.0,2.5e-05,0.0,0.114824,0.068895,0.091859,0.047672,0.0,0.0,0.0,0.0,4e-06,3.424336e-07,0.240013,0.0,0.239702,0.240018,0.221711,0.240013,0.0,0.239702,0.240013,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001271,0.0,3.424336e-07,0.0,0.068895,0.114824,0.099514,0.046336,0.000683,0.0,0.0,3.141593,0.0,0.0,0.0,0.0,0.0,0.0,0.099514,0.091859,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000341,0.008604,0.0,0.0,0.02464,0.0,0.000409,1.427997,0.0,0.0,0.0,0.0,0.239702,0.0,0.239702,0.239702,1.570796,15
1,0.004892,0.0,0.000674,9e-06,0.0,0.068895,0.068895,0.068895,0.0,0.0,0.0,0.0,0.0,7.6e-05,1.678553e-05,0.005296,0.0,0.005289,0.005301,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000847,0.000421,8.378628e-06,0.003346,0.0,0.068895,0.04593,0.069506,0.001538,0.0,0.0,1.570796,0.0,0.000338,0.0,0.241661,0.0,0.523599,0.04593,0.068895,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02464,0.0,0.0,2.855993,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.570796,8
2,0.000412,0.0,0.000674,1.6e-05,0.0,0.114824,0.114824,0.114824,0.0,0.0,0.0,0.0,0.0,0.001499,0.000199832,0.000446,0.0,0.000445,0.000451,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000424,0.000421,9.990108e-05,0.039772,0.0,0.114824,0.076549,0.115842,0.004271,0.0,0.014819,1.570796,0.0,0.000338,0.0,0.0,0.0,0.523599,0.07655,0.114824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02464,0.0,0.0,1.427997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.570796,16
3,0.27247,3.4e-05,0.0,0.000842,0.0,3.114035,3.114035,3.114035,0.0,0.0,0.0,0.0,0.0,0.000123,2.733186e-07,0.294961,0.0,0.294579,0.294966,0.27247,0.294961,0.0,0.294579,0.294961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000847,0.0,2.733186e-07,0.0,3.114035,3.114035,3.114035,0.0,0.0,0.0,0.0,3.141593,0.0,0.0,0.0,0.0,0.0,0.0,3.114035,3.114035,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000341,0.291661,0.0,0.0,0.002406,0.0,0.000409,1.427997,0.0,0.0,0.0,0.0,0.294579,0.0,0.294579,0.294579,1.570796,15
4,0.2289,3.4e-05,0.0,3.1e-05,0.0,0.114824,0.114824,0.114824,0.0,0.0,0.0,0.0,0.0,5e-06,3.31438e-07,0.247796,0.0,0.247474,0.2478,0.228901,0.247796,0.0,0.247474,0.247796,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000847,0.0,3.31438e-07,0.0,0.114824,0.114824,0.114824,0.0,0.0,0.0,0.0,0.0,0.0,0.000676,0.0,0.0,0.0,0.0,0.114824,0.114824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000341,0.010754,0.0,0.0,0.02464,0.0,0.000409,1.427997,0.0,0.0,0.0,0.0,0.247474,0.0,0.247474,0.247474,1.570796,7


Unnamed: 0,num__Flow Duration,num__Total Fwd Packet,num__Total Bwd packets,num__Total Length of Fwd Packet,num__Total Length of Bwd Packet,num__Fwd Packet Length Max,num__Fwd Packet Length Min,num__Fwd Packet Length Mean,num__Fwd Packet Length Std,num__Bwd Packet Length Max,num__Bwd Packet Length Min,num__Bwd Packet Length Mean,num__Bwd Packet Length Std,num__Flow Bytes/s,num__Flow Packets/s,num__Flow IAT Mean,num__Flow IAT Std,num__Flow IAT Max,num__Flow IAT Min,num__Fwd IAT Total,num__Fwd IAT Mean,num__Fwd IAT Std,num__Fwd IAT Max,num__Fwd IAT Min,num__Bwd IAT Total,num__Bwd IAT Mean,num__Bwd IAT Std,num__Bwd IAT Max,num__Bwd IAT Min,num__Fwd PSH Flags,num__Bwd PSH Flags,num__Fwd URG Flags,num__Bwd URG Flags,num__Fwd Header Length,num__Bwd Header Length,num__Fwd Packets/s,num__Bwd Packets/s,num__Packet Length Min,num__Packet Length Max,num__Packet Length Mean,num__Packet Length Std,num__Packet Length Variance,num__FIN Flag Count,num__SYN Flag Count,num__RST Flag Count,num__PSH Flag Count,num__ACK Flag Count,num__URG Flag Count,num__CWR Flag Count,num__ECE Flag Count,num__Down/Up Ratio,num__Average Packet Size,num__Fwd Segment Size Avg,num__Bwd Segment Size Avg,num__Fwd Bytes/Bulk Avg,num__Fwd Packet/Bulk Avg,num__Fwd Bulk Rate Avg,num__Bwd Bytes/Bulk Avg,num__Bwd Packet/Bulk Avg,num__Bwd Bulk Rate Avg,num__Subflow Fwd Packets,num__Subflow Fwd Bytes,num__Subflow Bwd Packets,num__Subflow Bwd Bytes,num__FWD Init Win Bytes,num__Bwd Init Win Bytes,num__Fwd Act Data Pkts,num__Fwd Seg Size Min,num__Active Mean,num__Active Std,num__Active Max,num__Active Min,num__Idle Mean,num__Idle Std,num__Idle Max,num__Idle Min,cat__Protocol,label
15995,0.5285,3.4e-05,0.0,2.5e-05,0.0,0.114824,0.068895,0.091859,0.047672,0.0,0.0,0.0,0.0,2e-06,1.272345e-07,0.572126,0.0,0.571385,0.572131,0.5285,0.572127,0.0,0.571385,0.572127,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001271,0.0,1.272345e-07,0.0,0.068895,0.114824,0.099514,0.046336,0.000683,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.523599,0.0,0.099514,0.091859,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000341,0.008604,0.0,0.0,0.02464,0.0,0.000409,1.427997,0.0,0.0,0.0,0.0,0.571385,0.0,0.571385,0.571385,1.570796,9
15996,0.088973,3.4e-05,0.000674,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.358739e-06,0.048159,0.145959,0.095424,0.000775,0.088263,0.095548,0.0,0.095424,0.095548,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.141593,0.0,0.000847,0.000421,8.969247e-07,0.000184,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.570796,0.0,0.000338,0.448799,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000341,0.0,0.006756,0.0,0.02464,0.0,0.0,1.427997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.570796,19
15997,1.454734,0.000136,0.000674,0.0021,0.0,3.114035,3.068105,3.104849,0.03015,0.0,0.0,0.0,0.0,5.7e-05,1.413717e-07,0.314964,0.562842,0.720528,0.000494,1.454283,0.393583,0.360482,0.720528,0.273046,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.002542,0.000421,1.130973e-07,1.1e-05,0.0,3.114035,2.662612,2.051816,1.34007,0.0,0.0,1.570796,0.0,0.000338,0.0,1.208305,0.0,0.0,2.07092,3.104849,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00017,0.18175,0.0,0.0,0.02464,0.0,0.001635,1.427997,0.0,0.0,0.0,0.0,0.393072,0.331122,0.720528,0.272692,1.570796,8
15998,0.000903,0.0,0.000674,9e-06,0.0,0.068895,0.068895,0.068895,0.0,0.0,0.0,0.0,0.0,0.00041,9.101351e-05,0.000978,0.0,0.000977,0.000983,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000847,0.000421,4.54934e-05,0.018118,0.0,0.068895,0.04593,0.069506,0.001538,0.0,0.0,1.570796,0.0,0.000338,0.0,0.0,0.261799,0.523599,0.04593,0.068895,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02464,0.0,0.0,2.855993,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.570796,9
15999,1.062616,6.8e-05,0.0,0.001263,0.0,3.114035,3.114035,3.114035,0.0,0.0,0.0,0.0,0.0,4.7e-05,8.79646e-08,0.575166,0.854601,0.85153,0.297702,1.062616,0.575166,0.646682,0.85153,0.297697,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001271,0.0,8.79646e-08,0.0,3.114035,3.114035,3.114035,0.0,0.0,0.0,0.044456,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.768031,3.114035,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00017,0.218746,0.0,0.0,0.02464,0.0,0.000818,1.427997,0.0,0.0,0.0,0.0,0.574421,0.594011,0.85153,0.297311,1.570796,16


In [10]:
import joblib

joblib.dump(le, 'label_encoder.pkl')  # Save label encoder

['label_encoder.pkl']