In [7]:
import pandas as pd
import joblib
import xgboost as xgb

# Load the saved XGBoost model and scaler
model_path = './Models/XGBoost_model.pkl'
scaler_path = './Models/XGBoost_scaler.pkl'

xgb_model = joblib.load(model_path)
scaler = joblib.load(scaler_path)

# Load new data for prediction
new_data_path = './Dataset/converted_sqli.csv'
new_data = pd.read_csv(new_data_path, encoding='latin1')

# Ensure columns match the expected ones
numeric_features = [
    'query_len', 'num_words_query', 'no_single_qts', 'no_double_qts', 'no_punct',
    'no_single_cmnt', 'no_mult_cmnt', 'no_space', 'no_perc', 'no_log_opt',
    'no_arith', 'no_null', 'no_hexa', 'no_alpha', 'no_digit', 
    'len_of_chr_char_null', 'genuine_keywords'
]

X_new = new_data[numeric_features]

# Scale numeric features using the loaded scaler
X_new_scaled = scaler.transform(X_new)

# Make predictions using the loaded XGBoost model
predictions = xgb_model.predict(X_new_scaled)

# Save predictions to a CSV file
output_csv = './predictions/XGBoost_predictions.csv'
output = pd.DataFrame({'Prediction': predictions})
output.to_csv(output_csv, index=False)

print(f"✅ Predictions saved to {output_csv}")

✅ Predictions saved to ./predictions/XGBoost_predictions.csv


In [1]:
import pandas as pd
from sklearn.metrics import classification_report, accuracy_score , confusion_matrix

# Load the original dataset
original_data = pd.read_csv('./Dataset/converted_sqli.csv')  # Replace with your file path

# Load the predictions dataset
predictions_data = pd.read_csv('./predictions/XGBoost_predictions.csv')  # Replace with your file path

# Ensure the datasets are aligned (e.g., by index)
# If the datasets are not aligned, you may need to merge them on a common column
# Example: merged_data = pd.merge(original_data, predictions_data, on='common_column')

# Extract actual labels (y_true) and predicted labels (y_pred)
y_true = original_data['Label']  # Replace 'Label' with the actual column name
y_pred = predictions_data['Prediction']  # Replace 'Prediction' with the actual column name

accuracy = accuracy_score(y_true, y_pred)
print(f"Test Accuracy: {accuracy:.4f}")
# Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
TN, FP, FN, TP = cm.ravel()
print("Confusion Matrix:\n")
print(f"True Negative (TN): {TN}")
print(f"False Positive (FP): {FP}")
print(f"False Negative (FN): {FN}")
print(f"True Positive (TP): {TP}")
# Classification Report
print("Classification Report:\n", classification_report(y_true, y_pred))

Test Accuracy: 0.9717
Confusion Matrix:

True Negative (TN): 2911
False Positive (FP): 78
False Negative (FN): 29
True Positive (TP): 769
Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.97      0.98      2989
           1       0.91      0.96      0.93       798

    accuracy                           0.97      3787
   macro avg       0.95      0.97      0.96      3787
weighted avg       0.97      0.97      0.97      3787

