In [9]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
import tensorflow as tf
import pandas as pd

df = pd.read_csv("SQL_Injection.csv", dtype={"Query": str})

y = df["Label"].values 

vectorizer = TfidfVectorizer(max_features=100) 
x = vectorizer.fit_transform(df["Query"]).toarray()

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

mlp = Sequential()
mlp.add(Input(shape=(x_train.shape[1],)))
mlp.add(Dense(4, activation='relu'))
mlp.add(Dense(1, activation='sigmoid'))

mlp.compile(loss="binary_crossentropy", optimizer="adam", metrics=['accuracy'])

mlp.fit(x_train, y_train, epochs=5, batch_size=32, verbose=1)

_, acuracia = mlp.evaluate(x_test, y_test)
print("Acurácia: %.2f" % (acuracia*100))

Epoch 1/5
[1m677/677[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8201 - loss: 0.5779
Epoch 2/5
[1m677/677[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9314 - loss: 0.4377
Epoch 3/5
[1m677/677[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9441 - loss: 0.3533
Epoch 4/5
[1m677/677[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9468 - loss: 0.2962
Epoch 5/5
[1m677/677[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9571 - loss: 0.2551
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9623 - loss: 0.2354
Acurácia: 96.23


In [10]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from tensorflow.keras.models import Sequential
from sklearn.utils import class_weight
from tensorflow.keras.layers import Dense, Input
import tensorflow as tf
import pandas as pd
import numpy as np

df = pd.read_csv("SQL_Injection.csv", dtype={"Query": str})


vectorizer = TfidfVectorizer(max_features=100)
x_query = vectorizer.fit_transform(df["Query"]).toarray()

x_tabular = df.drop(columns=["Label", "Query"]).values

x = np.hstack([x_query, x_tabular])

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

mlp = Sequential()
mlp.add(Input(shape=(x_train.shape[1],)))
mlp.add(Dense(64, activation='relu'))
mlp.add(Dense(32, activation='relu'))
mlp.add(Dense(1, activation='sigmoid'))

mlp.compile(loss="binary_crossentropy", optimizer="adam", metrics=['accuracy'])

class_weights = class_weight.compute_class_weight(
    class_weight="balanced",
    classes=np.unique(y_train),
    y=y_train
)

class_weights = dict(enumerate(class_weights))


mlp.fit(x_train, y_train, epochs=5, batch_size=32, class_weight=class_weights)

_, acuracia = mlp.evaluate(x_test, y_test)
print("Acurácia: %.2f" % (acuracia*100))

Epoch 1/5
[1m677/677[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9336 - loss: 0.2797
Epoch 2/5
[1m677/677[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9904 - loss: 0.0479
Epoch 3/5
[1m677/677[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9927 - loss: 0.0381
Epoch 4/5
[1m677/677[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9933 - loss: 0.0352
Epoch 5/5
[1m677/677[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9943 - loss: 0.0318
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9960 - loss: 0.0251
Acurácia: 99.60
