# 1. Importing Libraries

In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


# 2. Load the dataset

In [11]:
df = pd.read_csv('embedded_system_network_security_dataset.csv')
df.head()

Unnamed: 0,packet_size,inter_arrival_time,src_port,dst_port,packet_count_5s,mean_packet_size,spectral_entropy,frequency_band_energy,label,protocol_type_TCP,protocol_type_UDP,src_ip_192.168.1.2,src_ip_192.168.1.3,dst_ip_192.168.1.5,dst_ip_192.168.1.6,tcp_flags_FIN,tcp_flags_SYN,tcp_flags_SYN-ACK
0,0.405154,0.620362,62569,443,0.857143,0.0,0.834066,0.534891,0.0,False,True,True,False,False,False,False,False,False
1,0.527559,0.741288,59382,443,0.785714,0.0,0.147196,0.990757,0.0,False,True,False,False,False,True,False,True,False
2,0.226199,0.485116,65484,80,0.285714,0.0,0.855192,0.031781,0.0,False,True,False,False,True,False,False,False,False
3,0.573372,0.450965,51707,53,0.142857,0.0,0.15322,0.169958,0.0,False,False,False,True,False,False,False,False,False
4,0.651396,0.88874,26915,53,0.714286,0.0,0.923916,0.552053,0.0,True,False,False,True,False,False,False,True,False


In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 18 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   packet_size            1000 non-null   float64
 1   inter_arrival_time     1000 non-null   float64
 2   src_port               1000 non-null   int64  
 3   dst_port               1000 non-null   int64  
 4   packet_count_5s        1000 non-null   float64
 5   mean_packet_size       1000 non-null   float64
 6   spectral_entropy       1000 non-null   float64
 7   frequency_band_energy  1000 non-null   float64
 8   label                  1000 non-null   float64
 9   protocol_type_TCP      1000 non-null   bool   
 10  protocol_type_UDP      1000 non-null   bool   
 11  src_ip_192.168.1.2     1000 non-null   bool   
 12  src_ip_192.168.1.3     1000 non-null   bool   
 13  dst_ip_192.168.1.5     1000 non-null   bool   
 14  dst_ip_192.168.1.6     1000 non-null   bool   
 15  tcp_f

In [13]:
df.describe()

Unnamed: 0,packet_size,inter_arrival_time,src_port,dst_port,packet_count_5s,mean_packet_size,spectral_entropy,frequency_band_energy,label
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,0.502446,0.512259,32024.617,199.769,0.506857,0.0,0.495222,0.485651,0.1
std,0.289606,0.28113,18520.890349,180.078488,0.303271,0.0,0.292927,0.295953,0.30015
min,0.0,0.0,1038.0,53.0,0.0,0.0,0.0,0.0,0.0
25%,0.256263,0.275909,16245.25,53.0,0.267857,0.0,0.236912,0.228039,0.0
50%,0.499642,0.515971,31883.0,80.0,0.5,0.0,0.50429,0.467905,0.0
75%,0.74141,0.746523,47746.25,443.0,0.785714,0.0,0.761556,0.750876,0.0
max,1.0,1.0,65484.0,443.0,1.0,0.0,1.0,1.0,1.0


# 3. Data Preprocessing

In [14]:
df.isnull().sum()

packet_size              0
inter_arrival_time       0
src_port                 0
dst_port                 0
packet_count_5s          0
mean_packet_size         0
spectral_entropy         0
frequency_band_energy    0
label                    0
protocol_type_TCP        0
protocol_type_UDP        0
src_ip_192.168.1.2       0
src_ip_192.168.1.3       0
dst_ip_192.168.1.5       0
dst_ip_192.168.1.6       0
tcp_flags_FIN            0
tcp_flags_SYN            0
tcp_flags_SYN-ACK        0
dtype: int64

In [16]:
df = df.dropna() # Drop rows with missing values

In [17]:
# Convert categorical columns into numeric
df = pd.get_dummies(df)

# 4. Feature Scaling

In [28]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df)

# convert the scaled data back to a DataFrame
X_scaled_df = pd.DataFrame(X_scaled, columns=df.columns)
X_scaled_df.head()

Unnamed: 0,packet_size,inter_arrival_time,src_port,dst_port,packet_count_5s,mean_packet_size,spectral_entropy,frequency_band_energy,label,protocol_type_TCP,protocol_type_UDP,src_ip_192.168.1.2,src_ip_192.168.1.3,dst_ip_192.168.1.5,dst_ip_192.168.1.6,tcp_flags_FIN,tcp_flags_SYN,tcp_flags_SYN-ACK,Anomaly
0,-0.336115,0.384724,1.650011,1.35137,1.155602,0.0,1.157333,0.16646,-0.333333,-0.671847,1.347925,1.471243,-0.714545,-0.712949,-0.722544,-0.57889,-0.554247,-0.585049,0.229416
1,0.086758,0.81508,1.477849,1.35137,0.919957,0.0,-1.188696,1.707564,-0.333333,-0.671847,1.347925,-0.679697,-0.714545,-0.712949,1.383999,-0.57889,1.80425,-0.585049,0.229416
2,-0.95435,-0.096598,1.807479,-0.665426,-0.729556,0.0,1.229489,-1.534356,-0.333333,-0.671847,1.347925,-0.679697,-0.714545,1.402626,-0.722544,-0.57889,-0.554247,-0.585049,0.229416
3,0.245026,-0.218135,1.063244,-0.815436,-1.200846,0.0,-1.16812,-1.067234,-0.333333,-0.671847,-0.741881,-0.679697,1.399493,-0.712949,-0.722544,-0.57889,-0.554247,-0.585049,0.229416
4,0.514577,1.339842,-0.276022,-0.815436,0.684312,0.0,1.464215,0.22448,-0.333333,1.488433,-0.741881,-0.679697,1.399493,-0.712949,-0.722544,-0.57889,1.80425,-0.585049,0.229416


# 5. Applying Isolation Forest

In [29]:
iso = IsolationForest(
    n_estimators=100,
    contamination=0.05,   # Assume 5% anomalies
    random_state=42
)

iso.fit(X_scaled_df)


# 6. Predict Anomalies

In [30]:
predictions = iso.predict(X_scaled_df)

# Convert output:
# 1  → Normal
# -1 → Anomaly

df['Anomaly'] = predictions


# 7. Check the count of Anomalies

In [31]:
df['Anomaly'].value_counts()


Anomaly
 1    950
-1     50
Name: count, dtype: int64

# 8. Separate Normal and Anamoly Datapoints

In [32]:
normal = df[df['Anomaly'] == 1]
anomaly = df[df['Anomaly'] == -1]

print("Normal Records:", len(normal))
print("Anomalies Detected:", len(anomaly))


Normal Records: 950
Anomalies Detected: 50


# 9. Data Visualization

In [33]:
import plotly.graph_objects as go

# Separate normal and anomaly

fig = go.Figure()

# Normal points
fig.add_trace(go.Scatter3d(
    x=normal['packet_size'],
    y=normal['inter_arrival_time'],
    z=normal['spectral_entropy'],
    mode='markers',
    name='Normal',
    marker=dict(
        size=5,
        color=normal['frequency_band_energy'],  # 4th dimension
        colorscale='Blues',
        opacity=0.6,
        colorbar=dict(title='frequency_band_energy')
    )
))

# Anomaly points
fig.add_trace(go.Scatter3d(
    x=anomaly['packet_size'],
    y=anomaly['inter_arrival_time'],
    z=anomaly['spectral_entropy'],
    mode='markers',
    name='Anomaly',
    marker=dict(
        size=7,
        color=anomaly['frequency_band_energy'],
        colorscale='Reds',
        opacity=0.9,
        symbol='diamond'
    )
))

# Cube layout
fig.update_layout(
    title='4D Cube Visualization (Network Anomaly Detection)',
    scene=dict(
        xaxis_title='packet_size',
        yaxis_title='inter_arrival_time',
        zaxis_title='spectral_entropy',
        aspectmode='cube'   # makes it cube-shaped
    ),
    width=900,
    height=700
)

fig.show()