In [17]:
# SQL Injection Detection Project

## Dataset Preprocessing
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

# Load synthetic dataset
data = {
    "payload": [
        "SELECT * FROM users WHERE username = 'admin' --",  # Malicious
        "DROP TABLE students; --",  # Malicious
        "1' OR '1'='1",  # Malicious
        "SELECT * FROM accounts WHERE user = 'user' AND pass = 'password'",  # Benign
        "Normal web request",  # Benign
        "SELECT * FROM admin WHERE id = 1 OR 1=1",  # Malicious
    ],
    "label": [1, 1, 1, 0, 0, 1],  # 1 = Malicious, 0 = Benign
}

# Convert data to DataFrame
df = pd.DataFrame(data)

# Vectorize payloads using TF-IDF
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df["payload"])
y = df["label"]

# Save processed data
import scipy.sparse
import numpy as np

scipy.sparse.save_npz("features.npz", X)
np.save("labels.npy", y)

## Model Training and Evaluation
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import scipy.sparse
import numpy as np
import joblib

# Load preprocessed data
X = scipy.sparse.load_npz("features.npz")
y = np.load("labels.npy")

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print(classification_report(y_test, y_pred, zero_division=0))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

# Save the model
joblib.dump(model, "sql_injection_model.pkl")


Accuracy: 0.50
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      0.50      0.67         2

    accuracy                           0.50         2
   macro avg       0.50      0.25      0.33         2
weighted avg       1.00      0.50      0.67         2

Confusion Matrix:
[[0 0]
 [1 1]]


['sql_injection_model.pkl']

In [20]:
import joblib
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer

# Load the trained model
model = joblib.load("sql_injection_model.pkl")

# Initialize TF-IDF Vectorizer (fit it on the same data as the training phase)
vectorizer = TfidfVectorizer()
data = [
    "SELECT * FROM users WHERE username = 'admin' --",
    "DROP TABLE students; --",
    "1' OR '1'='1",
    "SELECT * FROM accounts WHERE user = 'user' AND pass = 'password'",
    "Normal web request",
    "SELECT * FROM admin WHERE id = 1 OR 1=1",
]
vectorizer.fit(data)

# Test the model with new inputs
test_payloads = [
    "SELECT * FROM admin WHERE username = 'root' --",  # Malicious
    "UPDATE users SET password='12345' WHERE username='admin'",  # Malicious
    "GET /index.html HTTP/1.1",  # Benign
    "Normal user request",  # Benign
]

# Transform the test payloads using TF-IDF
X_test_payloads = vectorizer.transform(test_payloads)

# Make predictions
predictions = model.predict(X_test_payloads)

# Display results
for payload, prediction in zip(test_payloads, predictions):
    label = "Malicious" if prediction == 1 else "Benign"
    print(f"Payload: {payload}\nPrediction: {label}\n")



Payload: SELECT * FROM admin WHERE username = 'root' --
Prediction: Malicious

Payload: UPDATE users SET password='12345' WHERE username='admin'
Prediction: Benign

Payload: GET /index.html HTTP/1.1
Prediction: Benign

Payload: Normal user request
Prediction: Benign



In [1]:
# SQL Injection Detection Project

## Dataset Preprocessing
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

# Load synthetic dataset
data = {
    "payload": [
        "SELECT * FROM users WHERE username = 'admin' --",  # Malicious
        "DROP TABLE students; --",  # Malicious
        "1' OR '1'='1",  # Malicious
        "SELECT * FROM accounts WHERE user = 'user' AND pass = 'password'",  # Benign
        "Normal web request",  # Benign
        "SELECT * FROM admin WHERE id = 1 OR 1=1",  # Malicious
    ],
    "label": [1, 1, 1, 0, 0, 1],  # 1 = Malicious, 0 = Benign
}

# Convert data to DataFrame
df = pd.DataFrame(data)

# Vectorize payloads using TF-IDF
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df["payload"])
y = df["label"]

# Save processed data
import scipy.sparse
import numpy as np

scipy.sparse.save_npz("features.npz", X)
np.save("labels.npy", y)

## Model Training and Evaluation
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import scipy.sparse
import numpy as np
import joblib

# Load preprocessed data
X = scipy.sparse.load_npz("features.npz")
y = np.load("labels.npy")

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print(classification_report(y_test, y_pred, zero_division=0))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

# Save the model
joblib.dump(model, "sql_injection_model.pkl")


Accuracy: 0.50
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      0.50      0.67         2

    accuracy                           0.50         2
   macro avg       0.50      0.25      0.33         2
weighted avg       1.00      0.50      0.67         2

Confusion Matrix:
[[0 0]
 [1 1]]


['sql_injection_model.pkl']

In [3]:
# SQL Injection Detection Project

## Dataset Preprocessing
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

# Load synthetic dataset
data = {
    "payload": [
        "SELECT * FROM users WHERE username = 'admin' --",  # Malicious
        "DROP TABLE students; --",  # Malicious
        "1' OR '1'='1",  # Malicious
        "SELECT * FROM accounts WHERE user = 'user' AND pass = 'password'",  # Benign
        "Normal web request",  # Benign
        "SELECT * FROM admin WHERE id = 1 OR 1=1",  # Malicious
    ],
    "label": [1, 1, 1, 0, 0, 1],  # 1 = Malicious, 0 = Benign
}

# Convert data to DataFrame
df = pd.DataFrame(data)

# Vectorize payloads using TF-IDF
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df["payload"])
y = df["label"]

# Save processed data
import scipy.sparse
import numpy as np

scipy.sparse.save_npz("features.npz", X)
np.save("labels.npy", y)

## Model Training and Evaluation
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import scipy.sparse
import numpy as np
import joblib

# Load preprocessed data
X = scipy.sparse.load_npz("features.npz")
y = np.load("labels.npy")

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print(classification_report(y_test, y_pred, zero_division=0))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

# Save the model
joblib.dump(model, "sql_injection_model.pkl")


Accuracy: 0.50
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      0.50      0.67         2

    accuracy                           0.50         2
   macro avg       0.50      0.25      0.33         2
weighted avg       1.00      0.50      0.67         2

Confusion Matrix:
[[0 0]
 [1 1]]


['sql_injection_model.pkl']