<a href="https://colab.research.google.com/github/Divyam-Deep/SQL-injection-detection-using-Random-Forest/blob/main/sql_injection_detection_using_random_forest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.feature_extraction.text import TfidfVectorizer


In [2]:
data = pd.read_excel('/content/sql.xlsx')
data.head()

Unnamed: 0,query,label
0,""" or pg_sleep ( __TIME__ ) --",1
1,create user name identified by pass123 tempora...,1
2,AND 1 = utl_inaddr.get_host_address ( ...,1
3,select * from users where id = '1' or @ @1 ...,1
4,"select * from users where id = 1 or 1#"" ( ...",1


In [8]:
xtrain, xtest, ytrain, ytest = train_test_split(data['query'], data['label'], test_size=0.2, random_state=42)

vectorizer = TfidfVectorizer()  # Extract features and convert to numeric data
xtrain = vectorizer.fit_transform(xtrain)
xtest = vectorizer.transform(xtest)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(xtrain, ytrain)
ypred = model.predict(xtest)

print(accuracy_score(ytest, ypred))
print(confusion_matrix(ytest, ypred))
print(classification_report(ytest, ypred))


0.9333333333333333
[[ 3  1]
 [ 0 11]]
              precision    recall  f1-score   support

           0       1.00      0.75      0.86         4
           1       0.92      1.00      0.96        11

    accuracy                           0.93        15
   macro avg       0.96      0.88      0.91        15
weighted avg       0.94      0.93      0.93        15



In [9]:
with open('sql.pkl', 'wb') as model_file:
  pickle.dump(model, model_file)

In [10]:
with open('tfidf_vector.pkl', 'wb') as vectorizer_file:
  pickle.dump(vectorizer, vectorizer_file)

In [12]:
def sql_detect(query):
  with open('sql.pkl', 'rb') as model_file:
    load_model = pickle.load(model_file)
  with open('tfidf_vector.pkl', 'rb') as vectorizer_file:
    load_vectorizer = pickle.load(vectorizer_file)
  query = load_vectorizer.transform([query])
  result = load_model.predict(query)[0]
  return "Sql injection detected" if result == 1 else "Safe"

print(sql_detect("SELECT * FROM rest"))

Safe
