
# Interactive UI for Human vs. GPT Text

Advanced Machine Learning Project
*   Priyanshi Gupta - 200070061
*   Srushti Bangde - 200070081



**Problem statement** \\
The problem statement for our project is to create an interactive user interface (UI) that can
effectively classify text as either human-authored or generated by the GPT model (such as
ChatGPT). Additionally, we aim to incorporate bias detection mechanisms to identify and
highlight potential biases in the GPT-generated text. We will utilize the Gradio library to
develop an accessible and user-friendly interface for users to input text and receive real-time feedback on its source (human or GPT) as well as any detected biases.

In [None]:
!pip install datasets



In [None]:
import transformers
import torch
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
from transformers import Trainer, TrainingArguments
from sklearn.model_selection import train_test_split

In [None]:
from datasets import load_dataset

dataset = load_dataset("WxWx/ChatGPT-Detector-Bias")

Downloading builder script:   0%|          | 0.00/5.11k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/5.51k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/952k [00:00<?, ?B/s]

Generating test split: 0 examples [00:00, ? examples/s]

In [None]:
import pandas as pd
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, AdamW
from torch.utils.data import DataLoader, Dataset
import torch

# Load the dataset
df = pd.read_csv('Dataset.csv')

# Define the DistilBERT tokenizer and model
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased')

# Tokenize the text data
encodings = tokenizer(df['Question'].tolist(), truncation=True, padding=True)

# Create a custom PyTorch Dataset
class TextDataset(Dataset):
    def __init__(self, encodings):
        self.encodings = encodings

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        return item

    def __len__(self):
        return len(self.encodings['input_ids'])

# Convert the data to PyTorch Dataset
dataset = TextDataset(encodings)

# Define DataLoader
loader = DataLoader(dataset, batch_size=8, shuffle=True)

# Set up training parameters
optimizer = AdamW(model.parameters(), lr=5e-5)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Train the model
num_epochs = 30
for epoch in range(num_epochs):
    model.train()
    for batch in loader:
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        outputs = model(input_ids, attention_mask=attention_mask)
        loss = outputs.loss
        optimizer.step()

# Save the model if needed
model.save_pretrained('distilbert_model')

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'pre_classifier.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


# We can assume that ChatGPT's writing style is extremely polite. And unlike humans, it cannot produce responses that include metaphors, irony, or sarcasm

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
df = pd.read_csv('Dataset.csv')

# Assuming 'Human' and 'ChatGPT' columns represent the labels
X_train, X_test, y_train, y_test = train_test_split(df['Question'], df[['Human', 'ChatGPT']], test_size=0.2, random_state=42)

# Combine 'Human' and 'ChatGPT' columns into a single label
y_train = y_train.apply(lambda row: 'Human' if row['Human'] == 1 else 'ChatGPT', axis=1)
y_test = y_test.apply(lambda row: 'Human' if row['Human'] == 1 else 'ChatGPT', axis=1)

# Initialize the TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer(max_features=5000)

# Fit and transform the training data
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)

# Transform the test data
X_test_tfidf = tfidf_vectorizer.transform(X_test)

# Initialize the Naive Bayes classifier
classifier = MultinomialNB()

# Train the classifier
classifier.fit(X_train_tfidf, y_train)

# Predict on the test set
y_pred = classifier.predict(X_test_tfidf)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Display classification report
print(classification_report(y_test, y_pred))

# Predict on new texts
new_text = ["Weather depends on a lot of factors"]
new_text_tfidf = tfidf_vectorizer.transform(new_text)
prediction = classifier.predict(new_text_tfidf)
print(f'Prediction: {prediction[0]}')

Accuracy: 1.00
              precision    recall  f1-score   support

     ChatGPT       1.00      1.00      1.00        30

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Prediction: ChatGPT


In [None]:
!pip install datasets

Collecting datasets
  Downloading datasets-2.15.0-py3-none-any.whl (521 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m521.2/521.2 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
Collecting pyarrow-hotfix (from datasets)
  Downloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pyarrow-hotfix, dill, multiprocess, datasets
Successfully installed datasets-2.15.0 dill-0.3.7 multiprocess-0.70.15 pyarrow-hotfix-0.6


In [None]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load the dataset
dataset_path = '/content/Dataset - Sheet1.csv'  # Update with your actual path
data = pd.read_csv(dataset_path)

# Combine 'Human' and 'ChatGPT' into one column 'source'
data['source'] = data['Human'].apply(lambda x: 'Human') + data['ChatGPT'].apply(lambda x: ' GPT')

# Convert labels to numeric values
data['source'] = data['source'].map({'Human': 0, ' GPT': 1})

# Split the data
X_train, X_test, y_train, y_test = train_test_split(data['Question'], data['source'], test_size=0.2, random_state=42)

# Tokenization and Word Embeddings
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(X_train)

X_train = tokenizer.texts_to_sequences(X_train)
X_train = tf.keras.preprocessing.sequence.pad_sequences(X_train, padding='post', maxlen=15)

X_test = tokenizer.texts_to_sequences(X_test)
X_test = tf.keras.preprocessing.sequence.pad_sequences(X_test, padding='post', maxlen=15)

# Model Building
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=50, input_length=15),
    tf.keras.layers.LSTM(100),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Training
model.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.2)

# Evaluation
y_pred = (model.predict(X_test) > 0.5).astype(int)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [50]:
!pip install scikit-learn




In [67]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
dataset = pd.read_csv('Dataset.csv')

# Check if the lengths of 'Human' and 'ChatGPT' columns are consistent
if len(dataset['Human']) != len(dataset['ChatGPT']):
    raise ValueError("Lengths of 'Human' and 'ChatGPT' columns are inconsistent.")

# Combine human and ChatGPT answers into a single column 'Text'
dataset['Text'] = dataset['Human'] + ' ' + dataset['ChatGPT']

# Create labels (0 for Human, 1 for ChatGPT)
labels = [0] * len(dataset['Human']) + [1] * len(dataset['ChatGPT'])

# Check if the lengths of 'Human' and 'ChatGPT' columns are consistent
if len(dataset['Human']) != len(dataset['ChatGPT']):
    raise ValueError("Lengths of 'Human' and 'ChatGPT' columns are inconsistent.")

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(dataset['Text'], labels, test_size=0.2, random_state=42)

# Convert text data to TF-IDF features
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train a Naive Bayes classifier
classifier = MultinomialNB()
classifier.fit(X_train_tfidf, y_train)

# Predict on the test set
y_pred = classifier.predict(X_test_tfidf)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Print classification report
print(classification_report(y_test, y_pred))

# Example usage
new_text = "Bangalore's late-night safety varies by location."
new_text_tfidf = vectorizer.transform([new_text])
prediction = classifier.predict(new_text_tfidf)

if prediction[0] == 0:
    print("Human Answer")
else:
    print("ChatGPT Answer")


ChatGPT Answer


In [62]:
!pip install gradio



In [64]:
!pip install typing-extensions --upgrade



In [67]:
import gradio as gr
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer

# Load the saved model
model = DistilBertForSequenceClassification.from_pretrained("distilbert_model")

# Load the DistilBERT tokenizer
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")

# Define the function to make predictions
def predict_sentiment(text):
    inputs = tokenizer(text, truncation=True, padding=True, return_tensors="pt")
    outputs = model(**inputs)
    logits = outputs.logits
    predicted_class = logits.argmax().item()
    sentiment = "Most likely to be written by GPT" if predicted_class == 1 else "Most likely to be writeen by a Human"
    return sentiment

# Create the Gradio interface with improved aesthetics
iface = gr.Interface(
    fn=predict_sentiment,
    inputs=gr.inputs.Textbox(placeholder="Enter text here..."),
    outputs=gr.outputs.Textbox(label="Text Source Prediction"),
    live=True,
    title="GPT vs Human Classification Demo",
    description="Enter a text and get a prediction.",
    examples=[
        ["I love this product! It's amazing."],
        ["The movie was disappointing and boring."],
    ],
)

iface.launch()