In [17]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import time
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from memory_profiler import memory_usage

# Load the dataset
data_path = 'C:/Users/Natty PC/Documents/Party/Project II/PreData/data-100k.csv'
df = pd.read_csv(data_path)

# Handle missing data if necessary
df = df.dropna()

# Convert categorical features into numeric (Label Encoding)
le_protocol = LabelEncoder()
df['Protocol'] = le_protocol.fit_transform(df['Protocol'])

le_service = LabelEncoder()
df['Service'] = le_service.fit_transform(df['Service'])

le_state = LabelEncoder()
df['State'] = le_state.fit_transform(df['State'])

# Normalize numerical features
scaler = StandardScaler()
df[['Receive port', 'Original bytes', 'Receive bytes']] = scaler.fit_transform(
    df[['Receive port', 'Original bytes', 'Receive bytes']]
)

# Encode the target label
le_label = LabelEncoder()
df['Label'] = le_label.fit_transform(df['Label'])

# Prepare the feature matrix (X) and target vector (y)
X = df.drop('Label', axis=1).values
y = df['Label'].values

# Reshape the data into sequences (for LSTM: [samples, time_steps, features])
X = X.reshape(X.shape[0], 1, X.shape[1])

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.long)

# Define the LSTM model
class LSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout_prob=0.3):
        super(LSTMClassifier, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_prob)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        lstm_out, (h_n, c_n) = self.lstm(x)
        last_lstm_out = lstm_out[:, -1, :]  # Take the output of the last time step
        output = self.fc(last_lstm_out)
        return output

# Hyperparameters
input_size = X_train.shape[2]  # Number of features
hidden_size = 128  # Increased hidden size
num_layers = 2
output_size = len(le_label.classes_)  # Number of classes
learning_rate = 0.001

# Initialize the model, loss function, and optimizer
model = LSTMClassifier(input_size, hidden_size, num_layers, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
epochs = 20  # Increased number of epochs
for epoch in range(epochs):
    model.train()  # Set the model to training mode
    outputs = model(X_train)  # Forward pass
    loss = criterion(outputs, y_train)  # Calculate loss
    optimizer.zero_grad()  # Zero out gradients
    loss.backward()  # Backward pass (calculate gradients)
    optimizer.step()  # Update the weights

    print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}')

# Function to evaluate the model and capture memory usage
def evaluate_model(model, X_test):
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        test_outputs = model(X_test)
        _, predicted = torch.max(test_outputs, 1)
    return predicted

# Measure the memory usage and runtime
start_time = time.time()
mem_usage = memory_usage((evaluate_model, (model, X_test), {}))

predicted = evaluate_model(model, X_test)

# Calculate accuracy
accuracy = (predicted == y_test).sum().item() / y_test.size(0)
print(f'Test Accuracy: {accuracy * 100:.2f}%')

# Print testing time cost
end_time = time.time()
testing_time = end_time - start_time
print(f'Testing Time Cost: {testing_time:.4f} seconds')

# Print memory consumption
print(f'Memory Consumption (max): {max(mem_usage)} MiB')

# Print predicted and actual values
print("\nPredicted Values:", le_label.inverse_transform(predicted.numpy()))
print("Actual Values:", le_label.inverse_transform(y_test.numpy()))


Epoch 1/20, Loss: 1.6175
Epoch 2/20, Loss: 1.6126
Epoch 3/20, Loss: 1.6078
Epoch 4/20, Loss: 1.6030
Epoch 5/20, Loss: 1.5982
Epoch 6/20, Loss: 1.5934
Epoch 7/20, Loss: 1.5884
Epoch 8/20, Loss: 1.5834
Epoch 9/20, Loss: 1.5784
Epoch 10/20, Loss: 1.5731
Epoch 11/20, Loss: 1.5678
Epoch 12/20, Loss: 1.5622
Epoch 13/20, Loss: 1.5563
Epoch 14/20, Loss: 1.5502
Epoch 15/20, Loss: 1.5439
Epoch 16/20, Loss: 1.5371
Epoch 17/20, Loss: 1.5303
Epoch 18/20, Loss: 1.5226
Epoch 19/20, Loss: 1.5149
Epoch 20/20, Loss: 1.5066
