In [2]:
import pandas as pd

# Load the SYN attack data
df_syn = pd.read_csv("port.csv")

# List of features expected by the DNN model
required_features = [
    "arp.opcode","arp.hw.size","icmp.checksum","icmp.seq_le","http.content_length","http.response",
    "tcp.ack","tcp.ack_raw","tcp.checksum","tcp.connection.fin","tcp.connection.rst",
    "tcp.connection.syn","tcp.connection.synack","tcp.dstport","tcp.flags","tcp.flags.ack","tcp.len",
    "tcp.seq","tcp.srcport","udp.stream","udp.time_delta","dns.qry.name","dns.qry.qu",
    "dns.retransmission","dns.retransmit_request","mqtt.conflag.cleansess","mqtt.conflags",
    "mqtt.hdrflags","mqtt.len","mqtt.msgtype","mqtt.proto_len","mqtt.topic_len","mqtt.ver",
    "Attack_label","Attack_type","http.request.method_0","http.request.method_0.0",
    "http.request.method_GET","http.request.method_OPTIONS","http.request.method_POST",
    "http.request.method_PROPFIND","http.request.method_PUT","http.request.method_SEARCH",
    "http.request.method_TRACE","http.referer_() { _; } >_[$($())] { echo 93e4r0-CVE-2014-6278: true; echo;echo; }",
    "http.referer_0","http.referer_0.0","http.referer_127.0.0.1","http.referer_TESTING_PURPOSES_ONLY",
    "http.request.version_-a HTTP/1.1",
    "http.request.version_-al&ABSOLUTE_PATH_STUDIP=http://cirt.net/rfiinc.txt?? HTTP/1.1",
    "http.request.version_-al&_PHPLIB[libdir]=http://cirt.net/rfiinc.txt?? HTTP/1.1",
    "http.request.version_/etc/passwd|?data=Download HTTP/1.1","http.request.version_0",
    "http.request.version_0.0","http.request.version_> HTTP/1.1","http.request.version_By Dr HTTP/1.1",
    "http.request.version_HTTP/1.0","http.request.version_HTTP/1.1",
    "http.request.version_Src=javascript:alert('Vulnerable')><Img Src=\\\" HTTP/1.1",
    "http.request.version_name=a><input name=i value=XSS>&lt;script>alert('Vulnerable')</script> HTTP/1.1",
    "http.request.version_script>alert(1)/script><\\\" HTTP/1.1",
    "dns.qry.name.len_0","dns.qry.name.len_0.0","dns.qry.name.len_0.debian.pool.ntp.org",
    "dns.qry.name.len_1.0","dns.qry.name.len_1.debian.pool.ntp.org","dns.qry.name.len_2.debian.pool.ntp.org",
    "dns.qry.name.len_3.debian.pool.ntp.org","dns.qry.name.len__googlecast._tcp.local",
    "dns.qry.name.len_null-null.local","dns.qry.name.len_raspberrypi.local","mqtt.conack.flags_0",
    "mqtt.conack.flags_0.0","mqtt.conack.flags_0x00000000","mqtt.conack.flags_1461073",
    "mqtt.conack.flags_1461074","mqtt.conack.flags_1461383","mqtt.conack.flags_1461384",
    "mqtt.conack.flags_1461589","mqtt.conack.flags_1461591","mqtt.conack.flags_1471198",
    "mqtt.conack.flags_1471199","mqtt.conack.flags_1574358","mqtt.conack.flags_1574359",
    "mqtt.protoname_0","mqtt.protoname_0.0","mqtt.protoname_MQTT","mqtt.topic_0","mqtt.topic_0.0",
    "mqtt.topic_Temperature_and_Humidity","Attack_class","uri_query_len","uri_query_entropy",
    "uri_special_char_count","file_data_len","file_data_entropy","file_data_special_chars",
    "tcp_option_count"
]

# Drop any extra/unwanted columns
df_syn = df_syn[[col for col in df_syn.columns if col in required_features]]

# Add missing columns and fill with 0
for col in required_features:
    if col not in df_syn.columns:
        df_syn[col] = 0

# Ensure correct order
df_syn = df_syn[required_features]

# Final output: df_syn is now ready to be passed to your DNN
print(df_syn.head())


   arp.opcode  arp.hw.size  icmp.checksum  icmp.seq_le  http.content_length  \
0           0            0              0            0                    0   
1           0            0              0            0                    0   
2           0            0              0            0                    0   
3           0            0              0            0                    0   
4           0            0              0            0                    0   

   http.response  tcp.ack  tcp.ack_raw tcp.checksum  tcp.connection.fin  ...  \
0              0        0            0       0x86bd                   0  ...   
1              0        0            0       0x8682                   0  ...   
2              0        0            0       0x8516                   0  ...   
3              0        0            0       0x6740                   0  ...   
4              0        0            0       0x82c2                   0  ...   

   mqtt.topic_0.0  mqtt.topic_Temperature_an

In [3]:
import torch.nn as nn
class ImprovedDNN(nn.Module):
    def __init__(self, input_dim, num_classes, dropout_rate=0.3):
        super(ImprovedDNN, self).__init__()
        
        # Wider architecture with batch normalization
        self.fc1 = nn.Linear(input_dim, 512)
        self.bn1 = nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(512, 256)
        self.bn2 = nn.BatchNorm1d(256)
        self.fc3 = nn.Linear(256, 128)
        self.bn3 = nn.BatchNorm1d(128)
        self.fc4 = nn.Linear(128, 64)
        self.bn4 = nn.BatchNorm1d(64)
        self.fc5 = nn.Linear(64, num_classes)

        # Activation & regularization
        self.dropout = nn.Dropout(dropout_rate)
        self.act = nn.LeakyReLU(0.1)  # LeakyReLU instead of ReLU

    def forward(self, x):
        x = self.act(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = self.act(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        x = self.act(self.bn3(self.fc3(x)))
        x = self.dropout(x)
        x = self.act(self.bn4(self.fc4(x)))
        x = self.dropout(x)
        x = self.fc5(x)
        return x

In [7]:
import pca
pca = PCA(n_components=0.99)  # Increased from 0.95 to 0.99
X_pca = pca.fit_transform(X_scaled)

# Create models directory if it doesn't exist
import os
os.makedirs('models', exist_ok=True)

# Save PCA and scaler
joblib.dump(pca, 'models/pca.pkl')
joblib.dump(scaler, 'models/standard_scaler.pkl')
print(f"Original features: {X_scaled.shape[1]}, Reduced features: {X_pca.shape[1]}")
print("PCA and scaler saved to models/ directory")

ModuleNotFoundError: No module named 'pca'

In [5]:
import pandas as pd
import torch
from sklearn.preprocessing import LabelEncoder

# Assuming df_syn is already created and cleaned
input_df = df_syn.drop(columns=[col for col in df_syn.columns if col.startswith("Attack_")], errors='ignore')

# Encode non-numeric/string columns
for col in input_df.columns:
    if input_df[col].dtype == 'object':
        le = LabelEncoder()
        input_df[col] = le.fit_transform(input_df[col].astype(str))

# Convert to float32 tensor
input_tensor = torch.tensor(input_df.values.astype('float32'))

# Load the model
model = ImprovedDNN(64,14)
model.load_state_dict(torch.load("DNN/models/best_model.pt"))
  # Replace with your actual model path
model.eval()

# Inference
with torch.no_grad():
    output = model(input_tensor)

# Convert output to class labels if classification
predicted_classes = torch.argmax(output, dim=1)

# Attach predictions to original df
df_syn['Predicted_Class'] = predicted_classes.numpy()

# Save or display
print(df_syn[['Predicted_Class']].head())
df_syn.to_csv("syn_predictions.csv", index=False)
print("✅ Predictions saved to syn_predictions.csv")


RuntimeError: mat1 and mat2 shapes cannot be multiplied (240x96 and 64x512)