In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("starblasters8/human-vs-llm-text-corpus")

print("Path to dataset files:", path)

In [None]:
import os
import shutil

os.makedirs("/root/.kaggle", exist_ok=True)

shutil.copy("/content/kaggle (1).json", "/root/.kaggle/kaggle.json")

# Set correct permissions
os.chmod("/root/.kaggle/kaggle.json", 0o600)

# Now download the dataset from Kaggle
!kaggle datasets download -d starblasters8/human-vs-llm-text-corpus --unzip

# Verify the files
print("Downloaded files:", os.listdir())

In [None]:
import pandas as pd

# Load the CSV file from Google Drive
df = pd.read_csv('/content/drive/MyDrive/Data/data (1).csv')
df.head()

In [None]:
df.info()

In [None]:
# Display all column names
print("Dataset Columns:", df.columns)

In [None]:
df['source'].value_counts()

In [None]:
df.rename(columns={'source': 'label'}, inplace=True)

In [None]:
# Check for missing values
print(df.isnull().sum())

# Show basic statistics
print(df.describe())

In [None]:
df.rename(columns={'source': 'label'}, inplace=True)

# Balance the sample: 1,000 human + 1,000 LLM examples
df_human = df[df['label'].str.lower() == 'human'].sample(n=1000, random_state=42)
df_llm = df[df['label'].str.lower() != 'human'].sample(n=1000, random_state=42)

# Combine and shuffle
subset_df = pd.concat([df_human, df_llm]).sample(frac=1, random_state=42).reset_index(drop=True)

# Optional: Save subset for reuse
subset_df.to_csv('/content/drive/MyDrive/llm_human_subset.csv', index=False)

# Check balance
subset_df['label'].value_counts()

In [None]:
subset_df.head()

In [None]:
# Load your balanced subset
df = pd.read_csv('/content/drive/MyDrive/llm_human_subset.csv')
df.rename(columns={'source': 'label'}, inplace=True)  # in case not already renamed

# Create binary label: 1 for human, 0 for LLM
df['binary_label'] = df['label'].apply(lambda x: 1 if x.lower() == 'human' else 0)

In [None]:
# Step 0: Install spaCy and English language model
!pip install spacy
!python -m spacy download en_core_web_sm

In [None]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [None]:
def extract_features(text):
    doc = nlp(text)
    words = [token.text for token in doc if token.is_alpha]
    word_count = len(words)
    sentence_count = len(list(doc.sents))
    unique_words = len(set(words))
    char_count = sum(len(word) for word in words)

    return pd.Series({
        'avg_word_length': char_count / word_count if word_count else 0,
        'lexical_diversity': unique_words / word_count if word_count else 0,
        'avg_sentence_length': word_count / sentence_count if sentence_count else 0
    })

# Apply to all rows
feature_df = df['text'].apply(extract_features)

# Add original columns for modeling
feature_df['text_length'] = df['text_length'] if 'text_length' in df.columns else df['text'].apply(len)
feature_df['word_count'] = df['word_count'] if 'word_count' in df.columns else df['text'].apply(lambda x: len(x.split()))
feature_df['label'] = df['binary_label']

from tqdm import tqdm
tqdm.pandas()
feature_df = df['text'].progress_apply(extract_features)

In [None]:
# This adds the correct label column for modeling
feature_df['label'] = df['binary_label']

In [None]:
from sklearn.model_selection import train_test_split

X = feature_df.drop(columns=['label'])  # only feature columns
y = feature_df['label']                 # target labels

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Train model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Plot importance
importance = model.feature_importances_
features = X.columns

plt.figure(figsize=(8, 5))
sns.barplot(x=importance, y=features)
plt.title("Feature Importance")
plt.xlabel("Importance")
plt.ylabel("Feature")
plt.tight_layout()
plt.show()

In [None]:
import numpy as np
# Instead of sampling 100 rows
sample_df = df.copy()  # Use all 1000 rows from your subset
sample_df['true_label'] = sample_df['binary_label']

# Simulate human annotations (80% accurate)
np.random.seed(42)
sample_df['human_judgment'] = sample_df['true_label'].apply(
    lambda x: x if np.random.rand() < 0.8 else 1 - x
)

# Step 1: Simulate features using your function
sample_features = sample_df['text'].apply(extract_features)

# Step 2: Add the original columns before prediction (IN the same DataFrame)
sample_features['text_length'] = sample_df['text'].apply(len)
sample_features['word_count'] = sample_df['text'].apply(lambda x: len(x.split()))

# Reorder to match training feature order
sample_features = sample_features[X.columns]  # X from earlier train-test split

# Step 3: Predict using trained model
sample_df['model_prediction'] = model.predict(sample_features)
# Evaluation
from sklearn.metrics import accuracy_score, cohen_kappa_score, confusion_matrix

print("Accuracy - Human vs. True:", accuracy_score(sample_df['true_label'], sample_df['human_judgment']))
print("Accuracy - Model vs. True:", accuracy_score(sample_df['true_label'], sample_df['model_prediction']))
print("Agreement - Human vs. Model (Cohen's Kappa):", cohen_kappa_score(sample_df['human_judgment'], sample_df['model_prediction']))
print("Confusion Matrix - Human vs. Model:")
print(confusion_matrix(sample_df['human_judgment'], sample_df['model_prediction']))

In [None]:
# Step 1: Filter disagreements
disagreements = sample_df[sample_df['human_judgment'] != sample_df['model_prediction']]

# Step 2: Sample a few to inspect
example_rows = disagreements.sample(n=5, random_state=42)

# Step 3: Display key info
for idx, row in example_rows.iterrows():
    print(f"\n🔹 Example {idx}")
    print(f"True Label: {'Human' if row['true_label'] == 1 else 'LLM'}")
    print(f"Human Annotator Guess: {'Human' if row['human_judgment'] == 1 else 'LLM'}")
    print(f"Model Prediction: {'Human' if row['model_prediction'] == 1 else 'LLM'}")
    print("Text Preview:")
    print(row['text'][:500] + '...')  # print first 500 characters only

In [None]:
!pip install textstat

In [None]:
import textstat
from textblob import TextBlob
import re

def analyze_text_features(text):
    # Readability score
    readability = textstat.flesch_reading_ease(text)

    # Sentiment polarity
    sentiment = TextBlob(text).sentiment.polarity

    # Lexical diversity (using simple .split() to avoid NLTK)
    words = text.split()
    lexical_diversity = len(set(words)) / len(words) if words else 0

    # Pronoun count
    pronouns = re.findall(r'\b(I|we|you|he|she|they|me|us|him|her|them)\b', text, flags=re.IGNORECASE)
    pronoun_count = len(pronouns)

    return {
        'readability': readability,
        'sentiment': sentiment,
        'lexical_diversity': lexical_diversity,
        'pronoun_count': pronoun_count
    }

In [None]:
# Clean and ensure text is string
disagreements['text'] = disagreements['text'].astype(str)

# Analyze features
disagreement_features = disagreements['text'].apply(analyze_text_features)

# Combine
disagreement_analysis = pd.concat([disagreements.reset_index(drop=True), disagreement_features.apply(pd.Series)], axis=1)

# Display sample with error-proof preview
for idx, row in disagreement_analysis.sample(n=5, random_state=42).iterrows():
    text_preview = str(row['text'])[:500] + '...' if pd.notnull(row['text']) else "[Missing Text]"

    print(f"\n🔍 Example {idx}")
    print(f"True Label: {'Human' if row['true_label'] == 1 else 'LLM'}")
    print(f"Human Annotator Guess: {'Human' if row['human_judgment'] == 1 else 'LLM'}")
    print(f"Model Prediction: {'Human' if row['model_prediction'] == 1 else 'LLM'}")
    print("Text Preview:")
    print(text_preview)
    print(f"\n🧠 Readability Score: {row['readability']}")
    print(f"💬 Sentiment Polarity: {row['sentiment']:.2f}")
    print(f"🧍 Pronouns Used: {row['pronoun_count']}")
    print(f"🧠 Lexical Diversity: {row['lexical_diversity']:.2f}")

In [None]:
# Drop rows where any feature is NaN
feature_cols = ['readability', 'sentiment', 'pronoun_count', 'lexical_diversity']
disagreement_clean = disagreement_analysis.dropna(subset=feature_cols).copy()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Melted dataframe already prepared
melted = disagreement_clean.melt(id_vars='true_label', value_vars=feature_cols)
melted['true_label'] = melted['true_label'].map({0: 'LLM', 1: 'Human'})

# Plot with separate scales
g = sns.catplot(
    data=melted, x='true_label', y='value', col='variable',
    kind='box', sharey=False, height=4, aspect=1
)

g.fig.subplots_adjust(top=0.8)
g.fig.suptitle("Disagreement Feature Distribution by True Label (Separate Scales)")
g.set_axis_labels("True Label", "Feature Value")
plt.show()

In [None]:
# Compute correlation
corr = disagreement_clean[feature_cols].corr()

# Heatmap
plt.figure(figsize=(6, 4))
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt=".2f")
plt.title("Correlation Between Text Features")
plt.tight_layout()
plt.show()

In [None]:
nlp.enable_pipe("senter")  # Optional safety, depending on spaCy version

In [None]:
def has_nonstandard_grammar(text):
    doc = nlp(text)
    frag_count = sum(
        1 for sent in doc.sents
        if not any(tok.dep_ == 'nsubj' for tok in sent) or not any(tok.pos_ == 'VERB' for tok in sent)
    )
    return frag_count > 0

In [None]:
contradiction_patterns = [
    r"\bbut\b.*?\bhowever\b",
    r"\byet\b.*?\balso\b",
    r"\bon the other hand\b",
    r"\bdespite\b.*?\bstill\b"
]

In [None]:
!pip install transformers --quiet
from transformers import pipeline

In [None]:
sentiment_pipeline = pipeline("sentiment-analysis")

In [None]:
df['sentiment'] = df['text'].apply(lambda x: sentiment_pipeline(x[:512])[0]['label'])  # Truncate long text

In [None]:
import re
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import spacy

# Load spaCy
nlp = spacy.load("en_core_web_sm")

# Define heuristics
def has_nonstandard_grammar(text):
    doc = nlp(text)
    frag_count = sum(1 for sent in doc.sents if len(sent) < 5 or not any(tok.dep_ == 'nsubj' for tok in sent))
    return frag_count >= 1

def has_ironic_or_cultural_cue(text):
    cues = ["as if", "yeah right", "because of course", "totally", "sarcasm", "obviously", "great job", "classic"]
    return any(cue in text.lower() for cue in cues)

def has_logical_inconsistency(text):
    contradiction_patterns = [r"\bbut\b.*\bhowever\b", r"\byet\b.*\balso\b", r"\bon the other hand\b"]
    return any(re.search(pattern, text.lower()) for pattern in contradiction_patterns)

# Apply to DataFrame `df` (replace if your disagreement set is named differently)
df['nonstandard_grammar'] = df['text'].apply(has_nonstandard_grammar)
df['irony_or_culture'] = df['text'].apply(has_ironic_or_cultural_cue)
df['logical_inconsistency'] = df['text'].apply(has_logical_inconsistency)

# Tag dominant reason
def tag_reason(row):
    reasons = []
    if row['irony_or_culture']:
        reasons.append("Humor/Irony/Culture")
    if row['nonstandard_grammar']:
        reasons.append("Nonstandard Grammar")
    if row['logical_inconsistency']:
        reasons.append("Logical Inconsistency")
    return ", ".join(reasons) if reasons else "Unclear"

df['failure_reason'] = df.apply(tag_reason, axis=1)

In [None]:

# ✅ Visualization: Count Plot of Failure Reasons
plt.figure(figsize=(10, 5))
sns.countplot(data=df, x='failure_reason', order=df['failure_reason'].value_counts().index)
plt.title("Heuristic Breakdown of Disagreements")
plt.xlabel("Likely Reason for Human-Model Disagreement")
plt.ylabel("Number of Samples")
plt.xticks(rotation=30)
plt.tight_layout()
plt.show()