In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

#  Load datasets
bird_essays = pd.read_csv('/content/drive/MyDrive/data/BARD/BARD_essay.csv')  # columns: index, prompts, BARD
gpt_essays = pd.read_csv('/content/drive/MyDrive/data/GPT/ChatGPT_essay.csv')    # columns: index, prompts, responses
human_essays = pd.read_csv('/content/drive/MyDrive/data/Human/human_essay_1.csv') # columns: index, essays

# Prepare the texts and add source labels
bird_essays['text'] = bird_essays['BARD']
gpt_essays['text'] = gpt_essays['responses']
human_essays['text'] = human_essays['essays']

# Add source labels
bird_essays['source'] = 1  # BIRD
gpt_essays['source'] = 0    # GPT
human_essays['source'] = 2   # Human

# Combine datasets
essays_data = pd.concat([bird_essays[['text', 'source']], gpt_essays[['text', 'source']], human_essays[['text', 'source']]], ignore_index=True)

# Clean the text (example cleaning steps)
essays_data['text'] = (
    essays_data['text'].str.replace(r'http\S+|www\S+|https\S+', '', case=False)
    .str.replace(r'<.*?>', '', case=False)
    .str.replace(r'\s+', ' ', regex=True)
    .str.lower()
)

# Prepare features and labels
X = essays_data['text']
y = essays_data['source']

# Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X)
X_sequences = tokenizer.texts_to_sequences(X)

# Calculate max sequence length and vocabulary size
max_length = max(len(seq) for seq in X_sequences)
vocab_size = len(tokenizer.word_index) + 1  # +1 for padding

# Pad sequences
X_padded = pad_sequences(X_sequences, maxlen=max_length)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_padded, y, test_size=0.2, random_state=42, stratify=y)

# RNN model architecture using GRU
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=128, input_length=max_length))
model.add(GRU(64, return_sequences=False))  # GRU layer
model.add(Dropout(0.5))  # Dropout for regularization
model.add(Dense(3, activation='softmax'))  # 3 classes for multiclass classification

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=32)

# Evaluate the model
y_pred = np.argmax(model.predict(X_test), axis=-1)

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

# Print results
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")
print(classification_report(y_test, y_pred, target_names=['GPT', 'BIRD', 'Human']))


Epoch 1/5




[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m211s[0m 3s/step - accuracy: 0.7890 - loss: 0.7631 - val_accuracy: 0.8988 - val_loss: 0.2418
Epoch 2/5
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m250s[0m 3s/step - accuracy: 0.9265 - loss: 0.1638 - val_accuracy: 0.9232 - val_loss: 0.1512
Epoch 3/5
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 3s/step - accuracy: 0.9382 - loss: 0.1104 - val_accuracy: 0.9319 - val_loss: 0.1440
Epoch 4/5
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 3s/step - accuracy: 0.9677 - loss: 0.0795 - val_accuracy: 0.9302 - val_loss: 0.1301
Epoch 5/5
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 3s/step - accuracy: 0.9818 - loss: 0.0563 - val_accuracy: 0.9319 - val_loss: 0.1427
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 351ms/step
Accuracy: 0.9319
Precision: 0.9282
Recall: 0.9319
F1-score: 0.9298
              precision    recall  f1-score   support

         GPT 