In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

import pickle

In [2]:
# Load data

# data = pd.read_csv('SQLiV3.csv', encoding='utf-16')
data = pd.read_csv('Modified_SQL_Dataset.csv')

In [4]:
data.dropna(inplace=True)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(data['Query'], data['Label'], test_size=0.2, random_state=42)

In [7]:
vectorizer = TfidfVectorizer(max_features=1000)

X_train_tfidf = vectorizer.fit_transform(X_train)

X_test_tfidf = vectorizer.transform(X_test)

In [None]:
# Initialize and train the classifier

model = RandomForestClassifier(n_estimators=100, random_state=42)

model.fit(X_train_tfidf, y_train)

# Evaluate on the test set

y_pred = model.predict(X_test_tfidf)

print("Accuracy:", accuracy_score(y_test, y_pred))

print("Classification Report:\n", classification_report(y_test, y_pred))

# Save model

Accuracy: 0.9948253557567918
Classification Report:
               precision    recall  f1-score   support

           0       0.99      1.00      1.00      3893
           1       1.00      0.99      0.99      2291

    accuracy                           0.99      6184
   macro avg       1.00      0.99      0.99      6184
weighted avg       0.99      0.99      0.99      6184



In [9]:
# Lưu lại model
with open('sql.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)

In [10]:
# Lưu lại file vector
with open('tfidf_vectorizer.pkl', 'wb') as vectorizer_file:
    pickle.dump(vectorizer, vectorizer_file)

In [14]:
def sql_detect(query):
    with open('sql.pkl', 'rb') as model_file:
        load_model=pickle.load(model_file)
    
    with open('tfidf_vectorizer.pkl', 'rb') as vectorize_file:
        load_vectorizer = pickle.load(vectorize_file)
    
    query_tfidf=load_vectorizer.transform([query])

    predict = load_model.predict(query_tfidf)

    return "SQL Injection Detected" if predict[0]==1 else "Safe"

print(sql_detect("id = -1 UNION SELECT username, password FROM admin--"))

Safe
