In [7]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFE
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer


In [8]:
# Column names for NSL-KDD dataset
nsl_kdd_columns = [
    'duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes',
    'land', 'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in',
    'num_compromised', 'root_shell', 'su_attempted', 'num_root', 'num_file_creations',
    'num_shells', 'num_access_files', 'num_outbound_cmds', 'is_host_login', 'is_guest_login',
    'count', 'srv_count', 'serror_rate', 'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate',
    'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count', 'dst_host_srv_count',
    'dst_host_same_srv_rate', 'dst_host_diff_srv_rate', 'dst_host_same_src_port_rate',
    'dst_host_srv_diff_host_rate', 'dst_host_serror_rate', 'dst_host_srv_serror_rate',
    'dst_host_rerror_rate', 'dst_host_srv_rerror_rate', 'label'
]

# Load the datasets with column names
nsl_train = pd.read_csv('KDDTrain+.txt', header=None, names=nsl_kdd_columns)
nsl_test = pd.read_csv('KDDTest+.txt', header=None, names=nsl_kdd_columns)
unsw_train = pd.read_csv('UNSW_NB15_training-set.csv')
unsw_test = pd.read_csv('UNSW_NB15_testing-set.csv')


In [9]:
# Function to preprocess and select features
def preprocess_and_select_features(train_data, test_data, target_column, n_features=13):
    # Separate features and labels
    if isinstance(target_column, int):
        X_train = train_data.iloc[:, :-1]
        y_train = train_data.iloc[:, -1]
        X_test = test_data.iloc[:, :-1]
        y_test = test_data.iloc[:, -1]
    else:
        X_train = train_data.drop(target_column, axis=1)
        y_train = train_data[target_column]
        X_test = test_data.drop(target_column, axis=1)
        y_test = test_data[target_column]
    
    # Encode categorical variables
    for column in X_train.select_dtypes(include=['object']).columns:
        # Combine train and test data for encoding
        all_data = pd.concat([X_train[column], X_test[column]], axis=0)
        le = LabelEncoder()
        le.fit(all_data.astype(str))
        
        # Transform both train and test data
        X_train[column] = le.transform(X_train[column].astype(str))
        X_test[column] = le.transform(X_test[column].astype(str))
    
    # Handle any remaining non-numeric columns
    X_train = X_train.apply(pd.to_numeric, errors='coerce')
    X_test = X_test.apply(pd.to_numeric, errors='coerce')
    
    # Impute any NaN values
    imputer = SimpleImputer(strategy='mean')
    X_train = pd.DataFrame(imputer.fit_transform(X_train), columns=X_train.columns)
    X_test = pd.DataFrame(imputer.transform(X_test), columns=X_test.columns)
    
    # Perform feature selection using RFE with Random Forest
    rfc = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
    selector = RFE(estimator=rfc, n_features_to_select=n_features, step=1)
    selector = selector.fit(X_train, y_train)
    
    # Get selected feature names
    selected_features = X_train.columns[selector.support_].tolist()
    
    # Apply feature selection to both train and test sets
    X_train_selected = X_train[selected_features]
    X_test_selected = X_test[selected_features]
    
    return X_train_selected, y_train, X_test_selected, y_test, selected_features

In [10]:
# Sample a subset of the data
nsl_train_sample = nsl_train.sample(frac=0.1, random_state=42)
nsl_test_sample = nsl_test.sample(frac=0.1, random_state=42)
unsw_train_sample = unsw_train.sample(frac=0.1, random_state=42)
unsw_test_sample = unsw_test.sample(frac=0.1, random_state=42)

# Perform feature selection on the sampled data
nsl_X_train, nsl_y_train, nsl_X_test, nsl_y_test, nsl_selected_features = preprocess_and_select_features(nsl_train_sample, nsl_test_sample, -1)
unsw_X_train, unsw_y_train, unsw_X_test, unsw_y_test, unsw_selected_features = preprocess_and_select_features(unsw_train_sample, unsw_test_sample, 'label')

# Print selected features
print("Selected features for NSL-KDD:")
print(nsl_selected_features)
print("\nSelected features for UNSW-NB15:")
print(unsw_selected_features)

Selected features for NSL-KDD:
['protocol_type', 'flag', 'src_bytes', 'num_failed_logins', 'is_guest_login', 'count', 'srv_rerror_rate', 'srv_diff_host_rate', 'dst_host_count', 'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate', 'dst_host_srv_rerror_rate']

Selected features for UNSW-NB15:
['id', 'sbytes', 'dbytes', 'rate', 'sttl', 'sload', 'tcprtt', 'smean', 'ct_state_ttl', 'ct_dst_sport_ltm', 'ct_dst_src_ltm', 'ct_srv_dst', 'attack_cat']


In [11]:
# Perform feature selection for NSL-KDD
nsl_X_train, nsl_y_train, nsl_X_test, nsl_y_test, nsl_selected_features = preprocess_and_select_features(nsl_train, nsl_test, 'label')

# Perform feature selection for UNSW-NB15
unsw_X_train, unsw_y_train, unsw_X_test, unsw_y_test, unsw_selected_features = preprocess_and_select_features(unsw_train, unsw_test, 'label')

# Print selected features
print("Selected features for NSL-KDD:")
print(nsl_selected_features)
print("\nSelected features for UNSW-NB15:")
print(unsw_selected_features)

Selected features for NSL-KDD:
['protocol_type', 'flag', 'src_bytes', 'num_failed_logins', 'is_guest_login', 'count', 'srv_rerror_rate', 'srv_diff_host_rate', 'dst_host_count', 'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate', 'dst_host_srv_rerror_rate']

Selected features for UNSW-NB15:
['id', 'dur', 'sbytes', 'dbytes', 'rate', 'sttl', 'sload', 'synack', 'ct_state_ttl', 'ct_dst_sport_ltm', 'ct_dst_src_ltm', 'ct_srv_dst', 'attack_cat']


In [30]:
def prepare_for_fusion(X, y, selected_features, dataset_name):
    # Reset index to avoid duplicate index issues
    X = X.reset_index(drop=True)
    y = y.reset_index(drop=True)
    
    # Add dataset identifier
    X['dataset'] = dataset_name
    
    # Ensure all selected features are present
    for feature in selected_features:
        if feature not in X.columns:
            X[feature] = 0  # or some other appropriate default value
    
    # Select only the chosen features
    X_selected = X[selected_features + ['dataset']]
    
    # Combine features and label
    df = pd.concat([X_selected, y], axis=1)
    
    return df

In [31]:
# Prepare datasets for fusion
nsl_train_prepared = prepare_for_fusion(nsl_X_train, nsl_y_train, nsl_selected_features, 'NSL-KDD')
nsl_test_prepared = prepare_for_fusion(nsl_X_test, nsl_y_test, nsl_selected_features, 'NSL-KDD')
unsw_train_prepared = prepare_for_fusion(unsw_X_train, unsw_y_train, unsw_selected_features, 'UNSW-NB15')
unsw_test_prepared = prepare_for_fusion(unsw_X_test, unsw_y_test, unsw_selected_features, 'UNSW-NB15')

In [32]:
# Combine datasets
import pandas as pd

fused_train = pd.concat([nsl_train_prepared, unsw_train_prepared], ignore_index=True)
fused_test = pd.concat([nsl_test_prepared, unsw_test_prepared], ignore_index=True)

In [33]:
# Normalize numerical features
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFE
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer
# Normalize numerical features
scaler = StandardScaler()
numerical_columns = fused_train.select_dtypes(include=[np.number]).columns.tolist()
if 'label' in numerical_columns:
    numerical_columns.remove('label')  # Exclude the label column from scaling

fused_train[numerical_columns] = scaler.fit_transform(fused_train[numerical_columns])
fused_test[numerical_columns] = scaler.transform(fused_test[numerical_columns])

In [34]:
# Save fused datasets
fused_train.to_csv('fused_train_dataset.csv', index=False)
fused_test.to_csv('fused_test_dataset.csv', index=False)

print("\nFused training dataset shape:", fused_train.shape)
print("Fused testing dataset shape:", fused_test.shape)
print("\nFused dataset columns:", fused_train.columns.tolist())


Fused training dataset shape: (208305, 28)
Fused testing dataset shape: (197885, 28)

Fused dataset columns: ['protocol_type', 'flag', 'src_bytes', 'num_failed_logins', 'is_guest_login', 'count', 'srv_rerror_rate', 'srv_diff_host_rate', 'dst_host_count', 'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate', 'dst_host_srv_rerror_rate', 'dataset', 'label', 'id', 'dur', 'sbytes', 'dbytes', 'rate', 'sttl', 'sload', 'synack', 'ct_state_ttl', 'ct_dst_sport_ltm', 'ct_dst_src_ltm', 'ct_srv_dst', 'attack_cat']


In [35]:
# Load the fused dataset
fused_train = pd.read_csv('fused_train_dataset.csv')
fused_test = pd.read_csv('fused_test_dataset.csv')

In [36]:
# Separate features and target
X = fused_train.drop(['label', 'dataset'], axis=1)
y = fused_train['label']

X_test = fused_test.drop(['label', 'dataset'], axis=1)
y_test = fused_test['label']

In [45]:
# Handle missing values
imputer = SimpleImputer(strategy='mean')
X = imputer.fit_transform(X)
X_test = imputer.transform(X_test)

In [46]:
# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(X)
X_test = scaler.transform(X_test)

In [47]:
# Split the data into training and validation sets
from sklearn.model_selection import train_test_split, cross_val_score
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [48]:
# Define models
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

models = {
    'SVM': SVC(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'KNN': KNeighborsClassifier(),
    'Naive Bayes': GaussianNB()
}

In [49]:
# Function to evaluate model
def evaluate_model(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    return accuracy, precision, recall, f1

In [51]:
# Train and evaluate models
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

results = {}
for name, model in models.items():
    print(f"\nTraining {name}...")
    model.fit(X_train, y_train)
    
    # Predict on validation set
    y_val_pred = model.predict(X_val)
    
    # Evaluate on validation set
    accuracy, precision, recall, f1 = evaluate_model(y_val, y_val_pred)
    
    print(f"{name} - Validation Results:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-score: {f1:.4f}")
    
    # Cross-validation
    cv_scores = cross_val_score(model, X, y, cv=5)
    print(f"Cross-validation scores: {cv_scores}")
    print(f"Mean CV score: {cv_scores.mean():.4f}")
    
    # Predict on test set
    y_test_pred = model.predict(X_test)
    
    # Evaluate on test set
    test_accuracy, test_precision, test_recall, test_f1 = evaluate_model(y_test, y_test_pred)
    
    print(f"{name} - Test Results:")
    print(f"Accuracy: {test_accuracy:.4f}")
    print(f"Precision: {test_precision:.4f}")
    print(f"Recall: {test_recall:.4f}")
    print(f"F1-score: {test_f1:.4f}")
    
    # Store results
    results[name] = {
        'validation': {'accuracy': accuracy, 'precision': precision, 'recall': recall, 'f1': f1},
        'cv_score': cv_scores.mean(),
        'test': {'accuracy': test_accuracy, 'precision': test_precision, 'recall': test_recall, 'f1': test_f1}
    }

# Print summary of results
print("\nSummary of Results:")
for name, result in results.items():
    print(f"\n{name}:")
    print(f"Validation Accuracy: {result['validation']['accuracy']:.4f}")
    print(f"Cross-Validation Score: {result['cv_score']:.4f}")
    print(f"Test Accuracy: {result['test']['accuracy']:.4f}")


Training SVM...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


SVM - Validation Results:
Accuracy: 0.8241
Precision: 0.8151
Recall: 0.8241
F1-score: 0.8015
Cross-validation scores: [0.81798325 0.82628838 0.76121553 0.82480017 0.82626437]
Mean CV score: 0.8113


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


SVM - Test Results:
Accuracy: 0.8643
Precision: 0.8818
Recall: 0.8643
F1-score: 0.8633

Training Decision Tree...
Decision Tree - Validation Results:
Accuracy: 0.9188
Precision: 0.9192
Recall: 0.9188
F1-score: 0.9189
Cross-validation scores: [0.91534049 0.91889297 0.92110127 0.92033317 0.91891697]
Mean CV score: 0.9189
Decision Tree - Test Results:
Accuracy: 0.9574
Precision: 0.9536
Recall: 0.9574
F1-score: 0.9548

Training Random Forest...
Random Forest - Validation Results:
Accuracy: 0.9288
Precision: 0.9277
Recall: 0.9288
F1-score: 0.9279
Cross-validation scores: [0.92532584 0.92861429 0.93058256 0.93034253 0.92525383]
Mean CV score: 0.9280
Random Forest - Test Results:
Accuracy: 0.9138
Precision: 0.9133
Recall: 0.9138
F1-score: 0.9098

Training KNN...
KNN - Validation Results:
Accuracy: 0.9094
Precision: 0.9085
Recall: 0.9094
F1-score: 0.9088
Cross-validation scores: [0.90254675 0.90981974 0.85761264 0.91113992 0.9087876 ]
Mean CV score: 0.8980
KNN - Test Results:
Accuracy: 0.7027


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [52]:
# Column names for NSL-KDD dataset
nsl_kdd_columns = [
    'duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes',
    'land', 'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in',
    'num_compromised', 'root_shell', 'su_attempted', 'num_root', 'num_file_creations',
    'num_shells', 'num_access_files', 'num_outbound_cmds', 'is_host_login', 'is_guest_login',
    'count', 'srv_count', 'serror_rate', 'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate',
    'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count', 'dst_host_srv_count',
    'dst_host_same_srv_rate', 'dst_host_diff_srv_rate', 'dst_host_same_src_port_rate',
    'dst_host_srv_diff_host_rate', 'dst_host_serror_rate', 'dst_host_srv_serror_rate',
    'dst_host_rerror_rate', 'dst_host_srv_rerror_rate', 'label'
]

# Load the datasets with column names
nsl_train = pd.read_csv('KDDTrain+.txt', header=None, names=nsl_kdd_columns)
nsl_test = pd.read_csv('KDDTest+.txt', header=None, names=nsl_kdd_columns)
unsw_train = pd.read_csv('UNSW_NB15_training-set.csv')
unsw_test = pd.read_csv('UNSW_NB15_testing-set.csv')

In [54]:
def preprocess_data(train_data, test_data, target_column):
    # Separate features and labels
    X_train = train_data.drop(target_column, axis=1)
    y_train = train_data[target_column]
    X_test = test_data.drop(target_column, axis=1)
    y_test = test_data[target_column]

    # Encode categorical variables
    le = LabelEncoder()
    for column in X_train.select_dtypes(include=['object']).columns:
        X_train[column] = le.fit_transform(X_train[column].astype(str))
        X_test[column] = le.transform(X_test[column].astype(str))
    
    # Handle any remaining non-numeric columns
    X_train = X_train.apply(pd.to_numeric, errors='coerce')
    X_test = X_test.apply(pd.to_numeric, errors='coerce')
    
    # Impute any NaN values
    imputer = SimpleImputer(strategy='mean')
    X_train = pd.DataFrame(imputer.fit_transform(X_train), columns=X_train.columns)
    X_test = pd.DataFrame(imputer.transform(X_test), columns=X_test.columns)
    
    # Normalize features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    return X_train, y_train, X_test, y_test

    # Preprocess NSL-KDD dataset
    nsl_X_train, nsl_y_train, nsl_X_test, nsl_y_test = preprocess_data(nsl_train, nsl_test, 'label')

    # Preprocess UNSW-NB15 dataset
    unsw_X_train, unsw_y_train, unsw_X_test, unsw_y_test = preprocess_data(unsw_train, unsw_test, 'label')

In [55]:
# Define models
models = {
    'SVM': SVC(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'KNN': KNeighborsClassifier(),
    'Naive Bayes': GaussianNB()
}

# Function to evaluate model
def evaluate_model(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    return accuracy, precision, recall, f1

# Train and evaluate models on NSL-KDD dataset
print("\nEvaluating models on NSL-KDD dataset:")
nsl_results = {}
for name, model in models.items():
    print(f"\nTraining {name} on NSL-KDD...")
    model.fit(nsl_X_train, nsl_y_train)
    
    # Predict on validation set
    y_val_pred = model.predict(nsl_X_test)
    
    # Evaluate on validation set
    accuracy, precision, recall, f1 = evaluate_model(nsl_y_test, y_val_pred)
    
    print(f"{name} - Validation Results:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-score: {f1:.4f}")
    
    # Store results
    nsl_results[name] = {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

# Train and evaluate models on UNSW-NB15 dataset
print("\nEvaluating models on UNSW-NB15 dataset:")
unsw_results = {}
for name, model in models.items():
    print(f"\nTraining {name} on UNSW-NB15...")
    model.fit(unsw_X_train, unsw_y_train)
    
    # Predict on validation set
    y_val_pred = model.predict(unsw_X_test)
    
    # Evaluate on validation set
    accuracy, precision, recall, f1 = evaluate_model(unsw_y_test, y_val_pred)
    
    print(f"{name} - Validation Results:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-score: {f1:.4f}")
    
    # Store results
    unsw_results[name] = {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }


Evaluating models on NSL-KDD dataset:

Training SVM on NSL-KDD...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


SVM - Validation Results:
Accuracy: 0.4744
Precision: 0.2330
Recall: 0.4744
F1-score: 0.3054

Training Decision Tree on NSL-KDD...
Decision Tree - Validation Results:
Accuracy: 0.6324
Precision: 0.5837
Recall: 0.6324
F1-score: 0.5999

Training Random Forest on NSL-KDD...
Random Forest - Validation Results:
Accuracy: 0.6422
Precision: 0.5872
Recall: 0.6422
F1-score: 0.6000

Training KNN on NSL-KDD...
KNN - Validation Results:
Accuracy: 0.6123
Precision: 0.5380
Recall: 0.6123
F1-score: 0.5601

Training Naive Bayes on NSL-KDD...
Naive Bayes - Validation Results:
Accuracy: 0.4727
Precision: 0.2657
Recall: 0.4727
F1-score: 0.3160

Evaluating models on UNSW-NB15 dataset:

Training SVM on UNSW-NB15...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


SVM - Validation Results:
Accuracy: 0.7110
Precision: 0.8241
Recall: 0.7110
F1-score: 0.7188

Training Decision Tree on UNSW-NB15...
Decision Tree - Validation Results:
Accuracy: 1.0000
Precision: 1.0000
Recall: 1.0000
F1-score: 1.0000

Training Random Forest on UNSW-NB15...
Random Forest - Validation Results:
Accuracy: 0.9524
Precision: 0.9586
Recall: 0.9524
F1-score: 0.9532

Training KNN on UNSW-NB15...
KNN - Validation Results:
Accuracy: 0.7252
Precision: 0.7771
Recall: 0.7252
F1-score: 0.7344

Training Naive Bayes on UNSW-NB15...
Naive Bayes - Validation Results:
Accuracy: 0.3543
Precision: 0.6407
Recall: 0.3543
F1-score: 0.2434


In [56]:
# Print summary of results
print("\nSummary of Results on NSL-KDD dataset:")
for name, result in nsl_results.items():
    print(f"\n{name}:")
    print(f"Accuracy: {result['accuracy']:.4f}")
    print(f"Precision: {result['precision']:.4f}")
    print(f"Recall: {result['recall']:.4f}")
    print(f"F1-score: {result['f1']:.4f}")

print("\nSummary of Results on UNSW-NB15 dataset:")
for name, result in unsw_results.items():
    print(f"\n{name}:")
    print(f"Accuracy: {result['accuracy']:.4f}")
    print(f"Precision: {result['precision']:.4f}")
    print(f"Recall: {result['recall']:.4f}")
    print(f"F1-score: {result['f1']:.4f}")

print("\nSummary of Results on Fused dataset:")
for name, result in results.items():
    print(f"\n{name}:")
    print(f"Validation Accuracy: {result['validation']['accuracy']:.4f}")
    print(f"Cross-Validation Score: {result['cv_score']:.4f}")
    print(f"Test Accuracy: {result['test']['accuracy']:.4f}")
    print(f"Precision: {result['test']['precision']:.4f}")
    print(f"Recall: {result['test']['recall']:.4f}")
    print(f"F1-score: {result['test']['f1']:.4f}")


Summary of Results on NSL-KDD dataset:

SVM:
Accuracy: 0.4744
Precision: 0.2330
Recall: 0.4744
F1-score: 0.3054

Decision Tree:
Accuracy: 0.6324
Precision: 0.5837
Recall: 0.6324
F1-score: 0.5999

Random Forest:
Accuracy: 0.6422
Precision: 0.5872
Recall: 0.6422
F1-score: 0.6000

KNN:
Accuracy: 0.6123
Precision: 0.5380
Recall: 0.6123
F1-score: 0.5601

Naive Bayes:
Accuracy: 0.4727
Precision: 0.2657
Recall: 0.4727
F1-score: 0.3160

Summary of Results on UNSW-NB15 dataset:

SVM:
Accuracy: 0.7110
Precision: 0.8241
Recall: 0.7110
F1-score: 0.7188

Decision Tree:
Accuracy: 1.0000
Precision: 1.0000
Recall: 1.0000
F1-score: 1.0000

Random Forest:
Accuracy: 0.9524
Precision: 0.9586
Recall: 0.9524
F1-score: 0.9532

KNN:
Accuracy: 0.7252
Precision: 0.7771
Recall: 0.7252
F1-score: 0.7344

Naive Bayes:
Accuracy: 0.3543
Precision: 0.6407
Recall: 0.3543
F1-score: 0.2434

Summary of Results on Fused dataset:

SVM:
Validation Accuracy: 0.8241
Cross-Validation Score: 0.8113
Test Accuracy: 0.8643
Precisi