In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer

class NetworkTrafficClassifier:
    def __init__(self, input_dim, num_classes):
        self.input_dim = input_dim
        self.num_classes = num_classes
        self.model = self._build_model()
    
    def _build_model(self):
        optimizer = tf.keras.optimizers.Adam(learning_rate=0.000001)
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(128, activation='relu', input_shape=(self.input_dim,)),
            tf.keras.layers.Dropout(0.3),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dropout(0.3),
            tf.keras.layers.Dense(self.num_classes, activation='softmax')  # Multi-class classification
        ])
        model.compile(optimizer=optimizer, 
                    loss='sparse_categorical_crossentropy', 
                    metrics=['accuracy'])
        return model

    
    def train(self, X_train, y_train, X_val, y_val, epochs=20, batch_size=32):
        self.history = self.model.fit(X_train, y_train,
                                      validation_data=(X_val, y_val),
                                      epochs=epochs,
                                      batch_size=batch_size,
                                      verbose=1)
        return self.history
    
    def evaluate(self, X, y):
        loss, accuracy = self.model.evaluate(X, y, verbose=0)
        return loss, accuracy
    
    def plot_training_results(self):
        if not hasattr(self, 'history'):
            raise ValueError("The model has not been trained yet. Train the model before plotting.")
        
        history = self.history.history
        
        plt.figure(figsize=(12, 5))
        
        # Plot loss
        plt.subplot(1, 2, 1)
        plt.plot(history['loss'], label='Training Loss')
        plt.plot(history['val_loss'], label='Validation Loss')
        plt.title('Loss Over Epochs')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()
        
        # Plot accuracy
        plt.subplot(1, 2, 2)
        plt.plot(history['accuracy'], label='Training Accuracy')
        plt.plot(history['val_accuracy'], label='Validation Accuracy')
        plt.title('Accuracy Over Epochs')
        plt.xlabel('Epochs')
        plt.ylabel('Accuracy')
        plt.legend()
        
        plt.tight_layout()
        plt.show()

# Load and preprocess the dataset
file_path = "Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv"  # Replace with your dataset file path
data = pd.read_csv(file_path)

# Encode labels
label_encoder = LabelEncoder()
data[' Label'] = label_encoder.fit_transform(data[' Label'])

# Identify and drop high-cardinality non-numeric columns
columns_to_drop = ['Flow ID', ' Source IP', ' Destination IP', ' Timestamp']
data = data.drop(columns=columns_to_drop, axis=1, errors='ignore')

# Identify remaining non-numeric columns
non_numeric_columns = data.select_dtypes(include=['object']).columns
print(f"Non-numeric columns: {non_numeric_columns}")

# Encode remaining non-numeric columns if they exist
if len(non_numeric_columns) > 0:
    encoder = OneHotEncoder(sparse_output=False)
    encoded_features = encoder.fit_transform(data[non_numeric_columns])
    
    # Drop the original non-numeric columns and append the encoded features
    X = data.drop([' Label'] + list(non_numeric_columns), axis=1).values
    X = np.hstack((X, encoded_features))
else:
    # If no non-numeric columns, proceed normally
    X = data.drop([' Label'], axis=1).values


# Separate labels
y = data[' Label'].values

# Handle infinity values and NaN
X = np.where(np.isinf(X), np.nan, X)  # Replace infinity with NaN
imputer = SimpleImputer(strategy='mean')  # Replace NaN with feature mean
X = imputer.fit_transform(X)

# Normalize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)


# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

# Instantiate the model
input_dim = X_train.shape[1]
num_classes = len(np.unique(y))
classifier = NetworkTrafficClassifier(input_dim, num_classes)

# Train the model
history = classifier.train(X_train, y_train, X_test, y_test, epochs=10, batch_size=32)

# Evaluate the model
train_loss, train_acc = classifier.evaluate(X_train, y_train)
test_loss, test_acc = classifier.evaluate(X_test, y_test)

print(f"Training Loss: {train_loss:.4f}, Training Accuracy: {train_acc:.4f}")
print(f"Testing Loss: {test_loss:.4f}, Testing Accuracy: {test_acc:.4f}")

# Plot the training results
classifier.plot_training_results()


Non-numeric columns: Index([], dtype='object')
Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m4233/4233[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 1ms/step - accuracy: 0.4464 - loss: 0.7899 - val_accuracy: 0.7608 - val_loss: 0.5559
Epoch 2/10
[1m4233/4233[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.6767 - loss: 0.5810 - val_accuracy: 0.9811 - val_loss: 0.4027
Epoch 3/10
[1m4233/4233[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1ms/step - accuracy: 0.8134 - loss: 0.4459 - val_accuracy: 0.9837 - val_loss: 0.3085
Epoch 4/10
[1m4233/4233[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - accuracy: 0.8910 - loss: 0.3506 - val_accuracy: 0.9816 - val_loss: 0.2432
Epoch 5/10
[1m4233/4233[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - accuracy: 0.9322 - loss: 0.2842 - val_accuracy: 0.9811 - val_loss: 0.1947
Epoch 6/10
[1m4233/4233[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - accuracy: 0.9583 - loss: 0.2319 - val_accuracy: 0.9809 - val_loss: 0.1574
Epoch 7/10
[1m4233/4233[0

FileNotFoundError: [Errno 2] No such file or directory: 'saved_model/my_model.keras'