In [3]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier

# Load network traffic data from CSV into DataFrame
df = pd.read_csv("C:/Users/cs123/Downloads/archive/dataset.csv")

# Convert IP addresses to integers
def ip_to_int(ip_series):
    return ip_series.apply(lambda ip: sum(int(ip_part) * 256 ** i for i, ip_part in enumerate(reversed(ip.split('.')))))

df['Source.IP'] = ip_to_int(df['Source.IP'])
df['Destination.IP'] = ip_to_int(df['Destination.IP'])

# Prepare features (X) and target variable (y) for Protocol prediction
X = df[['Source.IP', 'Source.Port', 'Destination.IP', 'Destination.Port', 
        'Total.Fwd.Packets', 'Total.Backward.Packets', 'Total.Length.of.Fwd.Packets',
        'Total.Length.of.Bwd.Packets', 'Flow.Bytes.s', 'Flow.Packets.s',
        'Fwd.Packet.Length.Max', 'Fwd.Packet.Length.Mean', 'Bwd.Packet.Length.Max', 
        'Bwd.Packet.Length.Mean', 'Flow.IAT.Mean', 'Flow.IAT.Std', 'Flow.IAT.Max',
        'Fwd.IAT.Total', 'Fwd.IAT.Mean', 'Fwd.IAT.Std', 'Fwd.IAT.Max', 'Bwd.IAT.Total', 
        'Bwd.IAT.Mean', 'Bwd.IAT.Std', 'Bwd.IAT.Max', 'Fwd.Header.Length', 'Bwd.Header.Length',
        'Fwd.Packets.s', 'Bwd.Packets.s', 'Packet.Length.Mean', 'Packet.Length.Std', 
        'Average.Packet.Size', 'Avg.Fwd.Segment.Size', 'Avg.Bwd.Segment.Size', 'Init_Win_bytes_forward',
        'Init_Win_bytes_backward', 'act_data_pkt_fwd', 'min_seg_size_forward', 'Active.Mean', 
        'Active.Std', 'Idle.Mean', 'Idle.Std']]

y = df["Protocol"]

# Train a RandomForestClassifier model
model = RandomForestClassifier()
model.fit(X, y)

# Get feature importances
feature_importances = model.feature_importances_

# Create a DataFrame to display feature importances
feature_importance_df = pd.DataFrame({"Feature": X.columns, "Importance": feature_importances})
feature_importance_df = feature_importance_df.sort_values(by="Importance", ascending=False)

# Display the most important features
print(feature_importance_df.head(10))


                    Feature  Importance
34   Init_Win_bytes_forward    0.287672
3          Destination.Port    0.200206
1               Source.Port    0.086598
25        Fwd.Header.Length    0.069479
37     min_seg_size_forward    0.064795
35  Init_Win_bytes_backward    0.049331
10    Fwd.Packet.Length.Max    0.026440
0                 Source.IP    0.022168
29       Packet.Length.Mean    0.020856
32     Avg.Fwd.Segment.Size    0.019711
