<a href="https://colab.research.google.com/github/Amanda9805/Detecting-Machine-Generated-Texts/blob/train-model/Final_COS760_project_cleaned.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Project: Detecting Machine-Generated Text in African Languages

## 1. Introduction and Setup

This notebook implements the methodology for detecting machine-generated text in English and isiZulu. We will follow these steps:
1.  **Data Collection & Preprocessing**: Load, clean, and balance datasets for human and machine-generated text.
2.  **Model Development**: Fine-tune multilingual models (Afro-XLMR for isiZulu, XLM-RoBERTa for English) for binary classification.
3.  **Evaluation & Analysis**:
    - Evaluate the isiZulu model's performance.
    - Conduct a zero-shot evaluation by testing the English-trained model on isiZulu data.
    - Use SHAP for explainability and perform error analysis.
4. **Interactive Demonstrator**: A final tool for real-time text analysis.

In [None]:
!pip install --upgrade transformers adapters datasets fsspec evaluate shap nltk

In [None]:
import pandas as pd
import numpy as np
import re
import json
import os
import gc
import warnings
import torch
import matplotlib.pyplot as plt

# NLTK for text processing
import nltk
from nltk.tokenize import word_tokenize
from collections import Counter

# Hugging Face for models and datasets
from datasets import load_dataset, Dataset, ClassLabel
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding,
    EarlyStoppingCallback,
    pipeline
)

# Evaluation and Explainability
import evaluate
import shap
from sklearn.metrics import precision_recall_fscore_support

# Setup
warnings.filterwarnings("ignore")
nltk.download('punkt', quiet=True)
torch.manual_seed(42)
np.random.seed(42)

## 2. Data Collection & Preprocessing

We define helper functions to load human and machine text, clean it, and create balanced datasets for training.

In [None]:
# Basic text cleaning
def clean_text(text):
    # Remove URLs
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    # Remove special characters and numbers
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    # Convert to lowercase
    text = text.lower()
    # Remove extra whitespace
    text = ' '.join(text.split())
    return text

# Load machine-generated text from a JSONL file
def load_machine_text(file_path):
    data = []
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            for line in file:
                if line.strip():
                    data.append(json.loads(line))
        df = pd.DataFrame(data)
        # Handle cases where text is a list of strings
        if not df.empty and isinstance(df['text'].iloc[0], list):
            df['text'] = df['text'].apply(lambda x: ' '.join(x))
        return df
    except FileNotFoundError:
        print(f"Error: Machine-generated text file not found at {file_path}")
        return pd.DataFrame()

# Load, process and balance data for a given language
def prepare_dataset(lang_code, human_data_source, machine_data_path, output_csv_path):
    print(f"Preparing dataset for language: {lang_code.upper()}")

    # Load Human Data (Label = 0)
    human_dataset = load_dataset(human_data_source, lang_code)
    human_df = human_dataset['train'].to_pandas()
    human_df = human_df[['text']].dropna()
    human_df['label'] = 0
    print(f"Loaded {len(human_df)} human-written data.")

    # Load Machine Data (Label = 1)
    machine_df = load_machine_text(machine_data_path)
    if machine_df.empty:
        return None
    machine_df = machine_df[['text']].dropna()
    machine_df['label'] = 1
    print(f"Loaded {len(machine_df)} machine-generated data.")

    # Combine and Clean
    combined_df = pd.concat([human_df, machine_df], ignore_index=True)
    combined_df['text'] = combined_df['text'].apply(clean_text)
    combined_df.dropna(subset=['text'], inplace=True)
    # combined_df = combined_df[combined_df['text'].str.len() > 10] # Remove very short texts

    # Create Balanced Dataset
    min_class_size = combined_df['label'].value_counts().min()
    balanced_df = combined_df.groupby('label').sample(n=min_class_size, random_state=42)
    balanced_df = balanced_df.sample(frac=1, random_state=42).reset_index(drop=True)
    print(f"Created a balanced dataset with {len(balanced_df)} total samples ({min_class_size} per class).")

    # Save and return dataset
    balanced_df.to_csv(output_csv_path, index=False, encoding='utf-8')
    print(f"Balanced dataset saved to {output_csv_path}")

    dataset = Dataset.from_pandas(balanced_df[['text', 'label']])
    dataset = dataset.cast_column('label', ClassLabel(num_classes=2, names=['human', 'machine']))
    return dataset

In [None]:
# Create dummy data files if they don't exist, as they are loaded in the notebook
if not os.path.exists('zulu_mg_text.jsonl'):
    with open('zulu_mg_text.jsonl', 'w') as f:
        f.write('{"text": "Lokhu umbhalo owenziwe ngomshini mayelana namasiko akwaZulu."}\n')
        f.write('{"text": "UNogwaja noFudu babengabangani abakhulu ehlathini."}\n')

if not os.path.exists('eng_mg_data.jsonl'):
    with open('eng_mg_data.jsonl', 'w') as f:
        f.write('{"text": "This is machine-generated text about South African culture."}\n')
        f.write('{"text": "The quick brown fox jumps over the lazy dog in Johannesburg."}\n')

# Prepare isiZulu Dataset
zulu_dataset = prepare_dataset(
    lang_code='zul',
    human_data_source='dsfsi/vukuzenzele-monolingual',
    machine_data_path='zulu_mg_text.jsonl',
    output_csv_path='balanced_zulu_texts.csv'
)

# Prepare English Dataset
english_dataset = prepare_dataset(
    lang_code='eng',
    human_data_source='dsfsi/vukuzenzele-monolingual',
    machine_data_path='eng_mg_data.jsonl',
    output_csv_path='balanced_english_texts.csv'
)

## 3. Phase 2: Model Development

We define a function to handle the fine-tuning process. We will train two models:
1.  **Afro-XLMR-Base**: Fine-tuned on our balanced isiZulu dataset.
2.  **XLM-RoBERTa-Base**: Fine-tuned on our balanced English dataset to later test for zero-shot transfer.

In [None]:
# Fine-tune a model on the provided dataset
def fine_tune_model(dataset, model_name, output_dir):
    print(f"\nFine-tuning {model_name} on {output_dir}")

    # Split data
    train_val_split = dataset.train_test_split(test_size=0.2, seed=42, stratify_by_column='label')
    train_dataset = train_val_split['train']
    val_dataset = train_val_split['test']

    # Tokenize datasets
    def tokenize_function(examples):
        return tokenizer(examples['text'], padding="max_length", truncation=True, max_length=512)

    # Load tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2, ignore_mismatched_sizes=True)

    train_dataset = train_dataset.map(tokenize_function, batched=True)
    val_dataset = val_dataset.map(tokenize_function, batched=True)

    # Set format for PyTorch
    train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])
    val_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])

    # Define metrics
    accuracy_metric = evaluate.load("accuracy")
    f1_metric = evaluate.load("f1")

    def compute_metrics(eval_pred):
        logits, labels = eval_pred
        predictions = np.argmax(logits, axis=-1)
        accuracy = accuracy_metric.compute(predictions=predictions, references=labels)['accuracy']
        f1 = f1_metric.compute(predictions=predictions, references=labels, average='weighted')['f1']
        return {'accuracy': accuracy, 'f1': f1}

    # Set up Trainer
    training_args = TrainingArguments(
        output_dir=output_dir,
        num_train_epochs=3,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        warmup_steps=50,
        weight_decay=0.01,
        logging_dir=f'{output_dir}/logs',
        logging_steps=10,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model="f1",
        greater_is_better=True,
        fp16=torch.cuda.is_available(),
        report_to="none"
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
    )

    # Train and save
    trainer.train()
    model.save_pretrained(f'{output_dir}/model')
    tokenizer.save_pretrained(f'{output_dir}/model')
    print(f"Best model saved to {output_dir}/model")

    # Evaluate the model
    eval_results = trainer.evaluate()
    print(f"Evaluation results: {eval_results}")

    # 7. Clean up memory
    del model, trainer
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    return f'{output_dir}/model'

In [None]:
# Fine-tune on isiZulu
if zulu_dataset:
    zulu_model_path = fine_tune_model(
        dataset=zulu_dataset,
        model_name="Davlan/afro-xlmr-base",
        output_dir="./zulu_finetuned_model"
    )
else:
    print("Skipping Zulu model training due to data loading issues.")
    zulu_model_path = None

# Fine-tune on English
if english_dataset:
    english_model_path = fine_tune_model(
        dataset=english_dataset,
        model_name="xlm-roberta-base",
        output_dir="./english_finetuned_model"
    )
else:
    print("Skipping English model training due to data loading issues.")
    english_model_path = None

## 4. Phase 3: Evaluation and Analysis

This phase addresses our core research questions:
1.  **Error Analysis & Explainability**: We analyze the performance of the isiZulu-specific model, identifying where it fails and using SHAP to understand its decision-making process.
2.  **Zero-Shot Transfer**: We evaluate how well the model trained only on English data performs on the isiZulu test set, measuring the effectiveness of cross-lingual transfer.

In [None]:
# Perform a full evaluation including metrics, error analysis and SHAP
def perform_evaluation(model_path, eval_dataset, report_path):
    print(f"\nEvaluating model from: {model_path}")

    # Load Model and Tokenizer
    model = AutoModelForSequenceClassification.from_pretrained(model_path)
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(device)
    model.eval()

    # Prepare evaluation pipeline with truncation
    classifier = pipeline(
        "text-classification",
        model=model,
        tokenizer=tokenizer,
        device=0 if device=='cuda' else -1,
        truncation=True,
        max_length=512
    )

    # Get Predictions
    texts = eval_dataset['text']
    true_labels = np.array(eval_dataset['label'])
    preds_output = classifier(texts, batch_size=8)

    # Extract predicted labels ('LABEL_0' -> 0)
    pred_labels = np.array([int(p['label'].split('_')[1]) for p in preds_output])

    # Calculate Overall Metrics
    precision, recall, f1, _ = precision_recall_fscore_support(true_labels, pred_labels, average='weighted')
    accuracy = (true_labels == pred_labels).mean()
    metrics_summary = f"Accuracy: {accuracy:.4f}\nPrecision: {precision:.4f}\nRecall: {recall:.4f}\nF1-Score: {f1:.4f}"
    print("Overall Performance Metrics:")
    print(metrics_summary)

    # Error Analysis
    errors = []
    for i, (true, pred) in enumerate(zip(true_labels, pred_labels)):
        if true != pred:
            tokens = word_tokenize(texts[i])
            ttr = len(set(tokens)) / len(tokens) if tokens else 0
            errors.append(
                f"Text: {texts[i][:200]}...\n"
                f"  True Label: {'machine' if true == 1 else 'human'}\n"
                f"  Predicted Label: {'machine' if pred == 1 else 'human'}\n"
                f"  Text Length: {len(tokens)} tokens, Lexical Diversity (TTR): {ttr:.3f}\n"
            )

    # SHAP Analysis on a few examples
    print("\nGenerating SHAP explanations for a few samples...")
    shap_explainer = shap.Explainer(classifier)
    shap_values = shap_explainer(texts[:3]) # Explain first 3 samples

    # Save SHAP plots to files
    shap_plots_dir = "./shap_plots"
    os.makedirs(shap_plots_dir, exist_ok=True)
    shap_plot_paths = []
    for i in range(len(shap_values)):
        plot_path = os.path.join(shap_plots_dir, f"{os.path.basename(model_path)}_sample_{i}.png")
        shap.plots.text(shap_values[i], display=False)
        plt.savefig(plot_path, dpi=150, bbox_inches='tight')
        plt.close()
        shap_plot_paths.append(plot_path)
    print(f"Saved {len(shap_plot_paths)} SHAP plots to '{shap_plots_dir}/'")

    # Write a consolidated report
    with open(report_path, 'w', encoding='utf-8') as f:
        f.write(f"Evaluation Report for: {model_path}\n")
        f.write("="*50 + "\n\n")
        f.write("### Overall Performance ###\n")
        f.write(metrics_summary + "\n\n")
        f.write("### SHAP Explanations ###\n")
        f.write(f"SHAP plots were saved to the '{shap_plots_dir}' directory.\n")
        f.write("These plots visualize token contributions to the model's prediction.\n\n")
        f.write("### Error Analysis ###\n")
        f.write(f"Found {len(errors)} misclassifications in the evaluation set.\n\n")
        if errors:
            f.write("\n".join(errors))
        else:
            f.write("No errors found. Perfect classification on this set!\n")

    print(f"\nFull evaluation report saved to: {report_path}")

    # Clean up memory
    del model, tokenizer, classifier, shap_explainer, shap_values
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

In [None]:
# Load the test split of the Zulu dataset for evaluation
nltk.download('punkt_tab')
if zulu_dataset:
    zulu_test_dataset = zulu_dataset.train_test_split(test_size=0.2, seed=42, stratify_by_column='label')['test']

    # Evaluation of the isiZulu-specific Model
    if zulu_model_path and os.path.exists(zulu_model_path):
        perform_evaluation(
            model_path=zulu_model_path,
            eval_dataset=zulu_test_dataset,
            report_path="./zulu_model_evaluation_report.txt"
        )
    else:
        print("Zulu model not found, skipping evaluation.")

    # Zero-Shot Evaluation of the English-trained Model on isiZulu
    if english_model_path and os.path.exists(english_model_path):
        perform_evaluation(
            model_path=english_model_path,
            eval_dataset=zulu_test_dataset,
            report_path="./zero_shot_evaluation_report.txt"
        )
    else:
        print("English model not found, skipping zero-shot evaluation.")
else:
    print("Zulu dataset not available, skipping all evaluations.")

## 5. Interactive Demonstrator

This final component provides a command-line interface to test the fine-tuned models on new text inputs. It includes memory management to efficiently switch between the English and isiZulu models and provides a prediction with a SHAP explanation.

In [None]:
import datetime
# Configuration
# Store model paths in a dictionary for easier access
MODELS_INFO = {
    'eng': {'path': './english_finetuned_model/model'},
    'zul': {'path': './zulu_finetuned_model/model'}
}

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
ID2LABEL = {0: 'Human-Generated', 1: 'Machine-Generated'}
LABEL2ID = {v: k for k, v in ID2LABEL.items()}
SHAP_PLOTS_DIR = 'shap_explanations'

# store loaded models on the CPU to avoid re-loading from disk
MODELS_CACHE = {}

# Create the directory for SHAP plots if it doesn't exist
os.makedirs(SHAP_PLOTS_DIR, exist_ok=True)

def get_or_load_model(lang_key, device):
    # Iterate through any models already in the cache
    for key, cached_data in MODELS_CACHE.items():
        # If there's a model on the GPU that is NOT the one we want, move it to CPU
        if key != lang_key and cached_data['model'].device.type == 'cuda':
            print(f"Moving '{key}' model to CPU to free up VRAM...")
            cached_data['model'].to('cpu')

    # Force Python's garbage collector and empty PyTorch's cache
    gc.collect()
    if device.type == 'cuda':
        torch.cuda.empty_cache()

    # Get the requested model
    if lang_key in MODELS_CACHE:
        # Model is already in our cache (on the CPU), just retrieve it
        print(f"Loading '{lang_key}' model from cache...")
        model = MODELS_CACHE[lang_key]['model']
        tokenizer = MODELS_CACHE[lang_key]['tokenizer']
    else:
        # Model is not cached, load it from disk
        print(f"Loading '{lang_key}' model from disk...")
        model_path_info = MODELS_INFO.get(lang_key)
        if not model_path_info:
            print(f"Error: No model path configured for language '{lang_key}'.")
            return None, None

        model_dir = os.path.join(model_path_info['path'], '')
        tokenizer_dir = os.path.join(model_path_info['path'], '')

        if not os.path.exists(model_dir) or not os.path.exists(tokenizer_dir):
            print(f"Error: Model or tokenizer not found at '{model_path_info['path']}'.")
            print("Please ensure you have trained and saved the models as per the notebook.")
            return None, None

        model = AutoModelForSequenceClassification.from_pretrained(model_dir)
        tokenizer = AutoTokenizer.from_pretrained(tokenizer_dir)

        # Store the newly loaded model and tokenizer in the cache
        MODELS_CACHE[lang_key] = {'model': model, 'tokenizer': tokenizer}

    # Move the requested model to the target device (GPU/CPU)
    if model.device.type != device.type:
        print(f"Moving '{lang_key}' model to {device.type.upper()}...")
        model.to(device)

    model.eval()
    print("...model is ready.")
    return model, tokenizer


def predict_and_explain(text, model, tokenizer, device):

    # Prediction
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=512)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)

    logits = outputs.logits
    probabilities = torch.softmax(logits, dim=-1).cpu().numpy()[0]
    prediction_idx = np.argmax(probabilities)
    predicted_label = ID2LABEL[prediction_idx]
    confidence = probabilities[prediction_idx]

    # SHAP Explanation
    print("\nGenerating SHAP explanation... (this may take a moment)")

    def predict_proba_for_shap(texts):
        if isinstance(texts, np.ndarray):
            texts = texts.tolist()
        inputs = tokenizer(texts, return_tensors='pt', padding=True, truncation=True, max_length=512).to(device)
        with torch.no_grad():
            outputs = model(**inputs)
        return torch.softmax(outputs.logits, dim=-1).cpu().numpy()

    explainer = shap.Explainer(predict_proba_for_shap, shap.maskers.Text(tokenizer, mask_token="<unk>"), output_names=list(ID2LABEL.values()))
    shap_values = explainer([text])

    # Generate and Save SHAP Plot
    class_index_to_explain = LABEL2ID['Machine-Generated']
    plt.figure()
    shap.plots.waterfall(shap_values[0, :, class_index_to_explain], show=False)

    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    plot_filename = f"shap_explanation_{lang_key}_{timestamp}.png"
    plot_path = os.path.join(SHAP_PLOTS_DIR, plot_filename)

    plt.savefig(plot_path, bbox_inches='tight')
    plt.close()
    print(f"...explanation saved to {plot_path}")

    return predicted_label, confidence, plot_path


def main():
    print("=" * 60)
    print(" Human vs. Machine Text Detector for English & isiZulu")
    print("=" * 60)
    print(f"Running on device: {str(DEVICE).upper()}")

    # Make lang_key accessible to predict_and_explain for file naming
    global lang_key

    while True:
        # Get User Input
        lang_choice = ""
        while lang_choice not in ['1', '2']:
            lang_choice = input("\nChoose a language:\n  1: English\n  2: isiZulu\nEnter choice (1 or 2): ")

        lang_key = 'eng' if lang_choice == '1' else 'zul'

        # Load model on-demand
        model, tokenizer = get_or_load_model(lang_key, DEVICE)

        # Check if the model failed to load
        if model is None:
            break # Exit if a model path is incorrect

        input_text = input(f"\nPlease enter the {lang_key.upper()} text you want to analyze:\n> ")

        if not input_text.strip():
            print("Error: Input text cannot be empty.")
            continue

        # Perform Analysis
        print("\nAnalyzing text...")
        predicted_label, confidence, plot_path = predict_and_explain(
            input_text, model, tokenizer, DEVICE
        )

        # Display Results
        print("\n" + "-"*25 + " ANALYSIS RESULTS " + "-"*25)
        print(f"  Prediction: The text is likely {predicted_label.upper()}")
        print(f"  Confidence: {confidence:.2%}")
        print(f"\nAn explanation plot has been saved here: {plot_path}")
        print("-" * 70)

        # Loop Control
        another = input("\nWould you like to analyze another text? (y/n): ").lower()
        if another != 'y':
            print("\nThank you for using the detector. Goodbye!")
            break

if __name__ == "__main__":
    main()

In [None]:
# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Load the fine-tuned model and tokenizer
model_path = "./zulu_finetuned_model/model"
tokenizer_path = "./zulu_finetuned_model/model"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)

# Create a text classification pipeline
classifier = pipeline(
    "text-classification",
    model=model,
    tokenizer=tokenizer,
    device=0 if torch.cuda.is_available() else -1,  # Use GPU if available
    return_all_scores=True
)

# Define label mapping
label_mapping = {0: "human", 1: "machine"}

# Function to predict and explain
def predict_and_explain(text):
    # Predict
    prediction = classifier(text)[0]
    predicted_label = label_mapping[int(prediction[0]["label"].split("_")[1])]
    human_prob = prediction[0]["score"] if predicted_label == "human" else prediction[1]["score"]
    machine_prob = prediction[1]["score"] if predicted_label == "human" else prediction[0]["score"]

    # SHAP Explanation
    explainer = shap.Explainer(classifier)
    shap_values = explainer([text])

    # Extract SHAP values for the predicted class
    predicted_class_idx = 0 if predicted_label == "human" else 1
    shap_values_for_predicted_class = shap_values[0, :, predicted_class_idx]

    # Format results
    result = {
        "text": text,
        "predicted_label": predicted_label,
        "human_probability": human_prob,
        "machine_probability": machine_prob,
        "shap_values": shap_values_for_predicted_class
    }

    # Print results
    print(f"\nInput Text: {text}")
    print(f"Predicted Label: {predicted_label}")
    print(f"Human Probability: {human_prob:.4f}")
    print(f"Machine Probability: {machine_prob:.4f}")
    print("\nSHAP Explanation:")
    shap.plots.text(shap_values[0, :, :])

    return result

# Interactive input loop
def main():
    print("Text Classification and SHAP Explanation Tool")
    print("Enter text to classify as human or machine-generated. Type 'exit' to quit.")

    while True:
        user_input = input("\nEnter text: ").strip()
        if user_input.lower() == "exit":
            print("Exiting...")
            break
        if not user_input:
            print("Please enter some text.")
            continue

        # Predict and explain
        predict_and_explain(user_input)

if __name__ == "__main__":
    main()