In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("dataset/human-ai-gen-news.csv", sep=";")

In [4]:
from sklearn.model_selection import train_test_split

In [16]:
X = df["text"].values
y = df["label"].values
X = X.astype(str)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [17]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
MAX_NUM_WORDS = 20000
MAX_SEQUENCE_LENGTH = 150

In [33]:
tokenizer = Tokenizer(num_words=MAX_NUM_WORDS)
tokenizer.fit_on_texts(X_train)

In [34]:
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

In [35]:
X_train_pad = pad_sequences(X_train_seq, maxlen=MAX_SEQUENCE_LENGTH, padding="post")
X_test_pad = pad_sequences(X_test_seq, maxlen=MAX_SEQUENCE_LENGTH, padding="post")

In [43]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout

In [49]:
model = Sequential([
    Embedding(input_dim=MAX_NUM_WORDS, output_dim=8),
    Conv1D(filters=8, kernel_size=5, activation="relu"),
    GlobalMaxPooling1D(),
    Dense(1, activation="sigmoid") 
])

In [50]:
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

In [51]:
history = model.fit(
    X_train_pad, y_train,
    epochs=2,
    batch_size=8,
    validation_split=0.2,
    verbose=1
)

Epoch 1/2
[1m960/960[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.8849 - loss: 0.4336 - val_accuracy: 0.9964 - val_loss: 0.0206
Epoch 2/2
[1m960/960[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9987 - loss: 0.0120 - val_accuracy: 0.9990 - val_loss: 0.0070


In [52]:
from sklearn.metrics import classification_report

In [58]:
test_loss, test_accuracy = model.evaluate(X_test_pad, y_test, verbose=0)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")
y_pred = (model.predict(X_test_pad) > 0.5).astype("int32")
print(classification_report(y_test, y_pred))

Test Loss: 0.0065, Test Accuracy: 0.9996
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 520us/step
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      1200
         1.0       1.00      1.00      1.00      1200

    accuracy                           1.00      2400
   macro avg       1.00      1.00      1.00      2400
weighted avg       1.00      1.00      1.00      2400

