<a href="https://colab.research.google.com/github/Amanda9805/Detecting-Machine-Generated-Texts/blob/train-model/Final_project_COS760.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install --upgrade transformers adapters datasets fsspec evaluate shap nltk

In [None]:
import pandas as pd
import numpy as np
import re
import nltk
import evaluate
import torch
import shap
import warnings
import datetime
from huggingface_hub import login
from datasets import load_dataset, Dataset, ClassLabel
from transformers import (AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, DataCollatorWithPadding, EarlyStoppingCallback, pipeline)
from sklearn.model_selection import train_test_split
from nltk.tokenize import word_tokenize
from collections import Counter
import gc
import matplotlib.pyplot as plt
import os
nltk.download('punkt_tab')

In [None]:
def clean_text(text):
    # Remove URLs
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    # Remove special characters and numbers
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    # Convert to lowercase
    text = text.lower()
    # Remove extra whitespace
    text = ' '.join(text.split())
    return text

In [None]:
#shona_data = load_dataset("DigitalUmuganda/AfriVoice", "sn", streaming=True, split="train[:10%]")

# english_data = load_dataset("oscar-corpus/OSCAR-2201", language="en", streaming=True)

# num_samples = 500
# samples = []
# for i, example in enumerate(english_data["train"]):
#     if i >= num_samples:
#         break
#     samples.append(example['text'])

# eng_df = pd.DataFrame(samples, columns=['text'])
# eng_df.head()

zulu_data = load_dataset("dsfsi/vukuzenzele-monolingual", "zul")

def reformatData(dataDict):
  langList = []
  for index, row in dataDict.iterrows():
    # Access data using column names from the row Series
    if 'text' in row and row['text']:
        langList.append({
        'text': row.get('text', ''),
        'author': row.get('author', ''),
        'title': row.get('title', ''),
        'language': 'zul'
              })

  return langList

human_data = reformatData(zulu_data['train'].to_pandas())

zul_df = pd.DataFrame(human_data)
zul_df['cleaned_text'] = zul_df['text'].apply(clean_text)
zul_df['tokens'] = zul_df['cleaned_text'].apply(word_tokenize)
zul_df['label'] = 0
zul_df.head()

In [None]:
from transformers import pipeline
import json

en_generator = pipeline('text-generation', model='gpt2')


english_prompts = [
    "Explain the significance of lobola in Southern Africa",
    "Write a short dialogue between two friends in Johannesburg",
    "Describe linguistic features that make isiZulu agglutinative"
]

zulu_prompts = [
    "Chaza ngokubaluleka kwesiko lwelobola eNingizimu Afrika",
    "Bhala inkulumo emfushane phakathi kwabangani ababili eGoli",
    "Landela indaba yamaZulu ngokomlando",
    "Chaza ngamasiko amasha eZulu eskhathini samanje",
    "Bhala inganekwane ethi 'UNogwaja noFudu'"
]

# def generate_with_prompts(prompts, generator, language, samples_per_prompt=3):
#     data = []
#     for prompt in prompts:
#         for _ in range(samples_per_prompt):
#             output = generator(prompt, max_length=100, do_sample=True, temperature=0.7)
#             data.append({
#                 'prompt': prompt,
#                 'text': output[0]['generated_text'],
#                 'label': 'machine',
#                 'language': language,
#                 'prompt_type': 'cultural' if "tsika" in prompt else 'linguistic'  # Tag for analysis
#             })
#     return pd.DataFrame(data)


# eng_mg_df = generate_with_prompts(english_prompts, en_generator, 'English')
# eng_mg_df['cleaned_text'] = eng_mg_df['text'].apply(clean_text)
# eng_mg_df['tokens'] = eng_mg_df['cleaned_text'].apply(word_tokenize)
# print(eng_mg_df['text'].iloc[0][:300])

def load_jsonl_data(file_path):
    data = []
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            for line in file:
                line = line.strip()
                if line:  # Skip empty lines
                    try:
                        data.append(json.loads(line))
                    except json.JSONDecodeError as e:
                        print(f"Error parsing JSON line: {e}")
                        continue
    except FileNotFoundError:
        print(f"File not found: {file_path}")
        return []

    return data

zul_mg_df = pd.DataFrame(load_jsonl_data('zulu_mg_text.jsonl'))

machine_data =reformatData(zul_mg_df)
zul_mg_df = pd.DataFrame(machine_data)
zul_mg_df['text'] = zul_mg_df['text'].apply(lambda x: ' '.join(x) if isinstance(x, list) else x)
zul_mg_df['language'] = 'zul'
zul_mg_df['cleaned_text'] = zul_mg_df['text'].apply(clean_text)
zul_mg_df['tokens'] = zul_mg_df['cleaned_text'].apply(word_tokenize)
zul_mg_df['label'] = 1

zul_mg_df.head()

COMBINE THE DATASETS
-------------------------

In [None]:
# Combine both datasets, this will be bad for us if we have more than 70% differnce in data length
# Combine both datasets
all_texts = pd.concat([zul_df, zul_mg_df], ignore_index=True)
all_texts_shuffled = all_texts.sample(frac=1).reset_index(drop=True)
print(f"Total texts after combining: {len(all_texts)}")
print(f"Human-written texts: {len(zul_df)}")
print(f"Machine-generated texts: {len(zul_mg_df)}")

# Create a DataFrame for easier analysis
combined_df = pd.DataFrame(all_texts_shuffled)
combined_df.head()

In [None]:
# Create a balanced dataset with equal samples from each class
# we can choose to use this of the combined one
def create_balanced_dataset(df, target_size_per_class=None):
    class_counts = df['label'].value_counts()
    min_class_size = class_counts.min()
    if target_size_per_class:
        sample_size = min(target_size_per_class, min_class_size)
    else:
        sample_size = min_class_size
    balanced_df = df.groupby('label').sample(n=sample_size, random_state=42)
    return balanced_df.reset_index(drop=True)

# Create balanced dataset, because we need the same number of samples for each class
balanced_df = create_balanced_dataset(combined_df)
print(f"\nBalanced dataset created with {len(balanced_df)} samples")
print(f"Label distribution in balanced dataset: \n{balanced_df['label'].value_counts()}")

# Save balanced dataset
balanced_output_path = 'balanced_zulu_texts.csv'
balanced_df.to_csv(balanced_output_path, index=False, encoding='utf-8')
print(f"Balanced dataset saved to: {balanced_output_path}")
balanced_df.head()

Model training section
----------------------

In [None]:
# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Load the balanced dataset
balanced_data_path = 'balanced_zulu_texts.csv'
df = pd.read_csv(balanced_data_path, encoding='utf-8')

# Convert pandas DataFrame to Hugging Face Dataset
dataset = Dataset.from_pandas(df[['text', 'label']])

# Cast the 'label' column to ClassLabel
dataset = dataset.cast_column('label', ClassLabel(num_classes=2, names=['human', 'machine']))

# Split into train and validation sets
train_val_split = dataset.train_test_split(test_size=0.2, seed=42, stratify_by_column='label')
train_dataset = train_val_split['train']
val_dataset = train_val_split['test']

# Load AfroXLMR tokenizer
model_name = "Davlan/afro-xlmr-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Tokenization function
def tokenize_function(examples):
    return tokenizer(
        examples['text'],
        padding="max_length",
        truncation=True,
        max_length=512
    )

# Tokenize datasets
train_dataset = train_dataset.map(tokenize_function, batched=True)
val_dataset = val_dataset.map(tokenize_function, batched=True)

# Set format for PyTorch
train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])
val_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])

# Load AfroXLMR model for sequence classification
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=2,
    ignore_mismatched_sizes=True
)

# Define metrics for evaluation
accuracy_metric = evaluate.load("accuracy")
f1_metric = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = accuracy_metric.compute(predictions=predictions, references=labels)
    f1 = f1_metric.compute(predictions=predictions, references=labels, average='weighted')
    return {
        'accuracy': accuracy['accuracy'],
        'f1': f1['f1']
    }

# Define training arguments
training_args = TrainingArguments(
    output_dir='./afroxlmr_finetuned',
    run_name='/afroxlmr_finetune_zulu',  # too much warnings so better to use a custom run name
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=50,
    weight_decay=0.01,
    logging_dir='logs',
    logging_steps=10,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True,
    fp16=torch.cuda.is_available(),  # Enable mixed precision only if GPU is available
    report_to="none"
)

# Initialize data collator
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)

# Fine-tune the model
trainer.train()

# Save the fine-tuned model and tokenizer
model.save_pretrained('./afroxlmr_finetuned/model')
tokenizer.save_pretrained('./afroxlmr_finetuned/tokenizer')

# Evaluate the model
eval_results = trainer.evaluate()
print(f"Evaluation results: {eval_results}")

In [None]:

# Free memory before starting
gc.collect()
torch.cuda.empty_cache() if torch.cuda.is_available() else None

# Load the fine-tuned model and tokenizer
# It's better to load the model through the Trainer if possible to use its prediction capabilities
model_path = './afroxlmr_finetuned'
model = AutoModelForSequenceClassification.from_pretrained(os.path.join(model_path, 'model'))
tokenizer = AutoTokenizer.from_pretrained(os.path.join(model_path, 'tokenizer'))

# Move model to GPU if available (Trainer handles this, but good practice)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
model.eval()

# Load the balanced dataset
balanced_data_path = 'balanced_zulu_texts.csv'
df = pd.read_csv(balanced_data_path, encoding='utf-8')

# Convert to Hugging Face Dataset
dataset = Dataset.from_pandas(df[['text', 'label']])
dataset = dataset.cast_column('label', ClassLabel(num_classes=2, names=['human', 'machine']))

# Split into train and validation sets
# Use the same split as training to ensure consistency
train_val_split = dataset.train_test_split(test_size=0.2, seed=42, stratify_by_column='label')
val_dataset = train_val_split['test']

# Define the same tokenization function as used for training
def tokenize_function(examples):
    return tokenizer(
        examples['text'],
        padding="max_length",
        truncation=True,
        max_length=512
    )

# Tokenize the validation dataset
val_dataset_tokenized = val_dataset.map(tokenize_function, batched=True)

# Set format for PyTorch
val_dataset_tokenized.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])

# Initialize data collator
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# Re-initialize the Trainer with the fine-tuned model and tokenizer
# allow using trainer.predict for consistent evaluation
training_args_eval = TrainingArguments(
    output_dir='/tmp/eval_output',
    per_device_eval_batch_size=8,
    report_to="none",
    no_cuda=not torch.cuda.is_available()
)

trainer = Trainer(
    model=model,
    args=training_args_eval,
    eval_dataset=val_dataset_tokenized,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)


# Function to compute linguistic features
# I dont know copying code
def compute_linguistic_features(text):
    tokens = word_tokenize(text.lower())
    # Lexical diversity (TTR: Type-Token Ratio)
    ttr = len(set(tokens)) / len(tokens) if tokens else 0
    # Top 5 bigrams
    bigrams = list(nltk.bigrams(tokens))
    bigram_freq = Counter(bigrams).most_common(5)
    # Text length
    text_length = len(tokens)
    return {
        'ttr': ttr,
        'top_bigrams': bigram_freq,
        'text_length': text_length
    }

# Error analysis using trainer.predict
def perform_error_analysis(trainer, val_dataset):
    # Use trainer.predict to get predictions
    predictions_output = trainer.predict(val_dataset)
    logits = predictions_output.predictions
    true_labels = predictions_output.label_ids
    predictions = np.argmax(logits, axis=-1)

    errors = []
    texts = val_dataset['text'] # Get original texts for features

    for idx, (text, true_label, pred_label) in enumerate(zip(texts, true_labels, predictions)):
        if pred_label != true_label:
            features = compute_linguistic_features(text)
            errors.append({
                'index': idx,
                'text': text,
                'true_label': 'machine' if true_label == 1 else 'human',
                'pred_label': 'machine' if pred_label == 1 else 'human',
                'ttr': features['ttr'],
                'top_bigrams': features['top_bigrams'],
                'text_length': features['text_length']
            })

    # Compute precision, recall, F1 using sklearn on numpy arrays
    from sklearn.metrics import precision_recall_fscore_support
    precision, recall, f1, _ = precision_recall_fscore_support(
        true_labels, predictions, average='weighted'
    )

    return errors, {'precision': precision, 'recall': recall, 'f1': f1}

# SHAP explainability analysis using a prediction function compatible with SHAP
def shap_analysis(val_dataset, model, tokenizer, device, num_samples=10):
    # Select a subset of validation texts and their corresponding true labels
    texts = val_dataset['text'][:num_samples]
    labels = val_dataset['label'][:num_samples]

    # Define a prediction function for SHAP that takes raw text and returns probabilities
    def predict_proba(texts):
        # Tokenize inputs manually for the SHAP explainer
        inputs = tokenizer(
            list(texts),
            padding=True,
            truncation=True,
            max_length=512,
            return_tensors='pt'
        )
        # Move inputs to the device
        inputs = {k: v.to(device) for k, v in inputs.items()}

        # Get model outputs (logits)
        with torch.no_grad():
            outputs = model(**inputs)
        logits = outputs.logits

        # Convert logits to probabilities (softmax)
        probs = torch.softmax(logits, dim=-1).cpu().numpy()
        return probs

    # Initialize SHAP explainer
    # Use the predict_proba function and a masker
    explainer = shap.Explainer(predict_proba, shap.maskers.Text(tokenizer, mask_token='<unk>'), output_names=['human', 'machine'])

    # Compute SHAP values
    shap_values = explainer(texts)

    # Save SHAP plots
    output_dir = '/content/drive/MyDrive/shap_plots'
    os.makedirs(output_dir, exist_ok=True)

    for i in range(len(texts)):
        # Use shap.plots.text for text explanations
        plt.figure(figsize=(10, 5))
        shap.plots.waterfall(shap_values[i, :, 1])
        plt.savefig(os.path.join(output_dir, f'shap_text_{i}.png'), bbox_inches='tight')
        plt.close()

    return shap_values

# Summarize findings
def summarize_findings(errors, metrics, output_file='/content/drive/MyDrive/evaluation_report.txt'):
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write("Phase 3: Evaluation and Analysis Report\n")
        f.write("=====================================\n\n")

        f.write("1. Model Performance Metrics\n")
        f.write(f"Precision: {metrics['precision']:.4f}\n")
        f.write(f"Recall: {metrics['recall']:.4f}\n")
        f.write(f"F1-Score: {metrics['f1']:.4f}\n\n")

        f.write("2. Error Analysis\n")
        if errors:
            f.write(f"Number of misclassifications: {len(errors)}\n")
            for error in errors:
                f.write(f"Index: {error['index']}\n")
                f.write(f"Text: {error['text'][:500]}...\n") # Increased text preview
                f.write(f"True Label: {error['true_label']}\n")
                f.write(f"Predicted Label: {error['pred_label']}\n")
                f.write(f"TTR: {error['ttr']:.4f}\n")
                f.write(f"Top Bigrams: {error['top_bigrams']}\n")
                f.write(f"Text Length: {error['text_length']}\n")
                f.write("-" * 50 + "\n")
        else:
            f.write("No misclassifications found in the validation set.\n\n")

        f.write("3. SHAP Explainability Analysis\n")
        f.write("SHAP plots have been saved to /content/drive/MyDrive/shap_plots/\n")
        f.write("Review saved plots for detailed token-level contributions.\n\n")

        f.write("4. Patterns of Success and Failure\n")
        f.write("- Model achieved performance as indicated by metrics.\n")
        if errors:
             f.write(f"- Errors occurred on {len(errors)} samples. Review error analysis for patterns.\n")
             f.write("- Linguistic features of misclassified texts might reveal insights (e.g., very short/long texts, unusual vocabulary).\n")
        else:
            f.write("- No misclassifications observed in the validation set, which could indicate excellent performance or potential overfitting on a small dataset.\n")
        f.write("- SHAP analysis helps understand which tokens influenced the model's decisions for the tested samples.\n")
        f.write("- Future work: Evaluate on a larger, more diverse dataset and potentially cross-lingual settings.\n")



# Use the trainer to predict on the tokenized validation dataset
errors, metrics = perform_error_analysis(trainer, val_dataset_tokenized)

# Perform SHAP analysis using the original val_dataset for texts
# Pass the loaded model, tokenizer, and device to the SHAP function
shap_values = shap_analysis(val_dataset, model, tokenizer, device, num_samples=min(10, len(val_dataset))) # Limit SHAP samples

# Summarize findings (SHAP values object is not directly summarized in the report text)
summarize_findings(errors, metrics)

# Free memory
del model, trainer, val_dataset, val_dataset_tokenized # Remove classifier as it's no longer used for analysis
gc.collect()
torch.cuda.empty_cache() if torch.cuda.is_available() else None

print("Evaluation and analysis complete. Report saved to /content/drive/MyDrive/evaluation_report.txt")
print(f"SHAP plots saved to /content/drive/MyDrive/shap_plots/")

Section for Zero-shot
---------------------

In [None]:
# Load English dataset
eng_data = load_dataset("dsfsi/vukuzenzele-monolingual", "eng")

eng_human_data = reformatData(eng_data['train'].to_pandas())

eng_df = pd.DataFrame(eng_human_data)
eng_df['cleaned_text'] = eng_df['text'].apply(clean_text)
eng_df['tokens'] = eng_df['cleaned_text'].apply(word_tokenize)
eng_df['label'] = 0
eng_df.head()

In [None]:
eng_mg_df = pd.DataFrame(load_jsonl_data('eng_mg_data.jsonl'))

eng_machine_data =reformatData(eng_mg_df)
eng_mg_df = pd.DataFrame(eng_machine_data)
eng_mg_df['text'] = eng_mg_df['text'].apply(lambda x: ' '.join(x) if isinstance(x, list) else x)
eng_mg_df['language'] = 'eng'
eng_mg_df['cleaned_text'] = eng_mg_df['text'].apply(clean_text)
eng_mg_df['tokens'] = eng_mg_df['cleaned_text'].apply(word_tokenize)
eng_mg_df['label'] = 1

eng_mg_df.head()

In [None]:
all_eng_texts = pd.concat([eng_df, eng_mg_df], ignore_index=True)
all_eng_texts_shuffled = all_eng_texts.sample(frac=1).reset_index(drop=True)
print(f"Total texts after combining: {len(all_eng_texts)}")
print(f"Human-written texts: {len(eng_df)}")
print(f"Machine-generated texts: {len(eng_mg_df)}")

# Create a DataFrame for easier analysis
eng_combined_df = pd.DataFrame(all_eng_texts_shuffled)
eng_combined_df.head()

In [None]:
eng_balanced_df = create_balanced_dataset(eng_combined_df)
print(f"\nBalanced dataset created with {len(eng_balanced_df)} samples")
print(f"Label distribution in balanced dataset: \n{eng_balanced_df['label'].value_counts()}")

# Save balanced dataset
balanced_output_path = 'balanced_english_texts.csv'
eng_balanced_df.to_csv(balanced_output_path, index=False, encoding='utf-8')
print(f"Balanced dataset saved to: {balanced_output_path}")
eng_balanced_df.head()

In [None]:

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Fine-tune on English Data

english_data_path = 'balanced_english_texts.csv'
df_english = pd.read_csv(english_data_path, encoding='utf-8')
dataset_english = Dataset.from_pandas(df_english[['text', 'label']])
dataset_english = dataset_english.cast_column('label', ClassLabel(num_classes=2, names=['human', 'machine']))

# Split into train and validation sets
train_val_split = dataset_english.train_test_split(test_size=0.2, seed=42, stratify_by_column='label')
train_dataset = train_val_split['train']
val_dataset = train_val_split['test']

# Load tokenizer and model
model_name = "xlm-roberta-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

# Tokenization function
def tokenize_function(examples):
    return tokenizer(
        examples['text'],
        padding="max_length",
        truncation=True,
        max_length=512
    )

# Tokenize datasets
train_dataset = train_dataset.map(tokenize_function, batched=True)
val_dataset = val_dataset.map(tokenize_function, batched=True)
train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])
val_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])

# Define evaluation metrics
accuracy_metric = evaluate.load("accuracy")
f1_metric = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = accuracy_metric.compute(predictions=predictions, references=labels)
    f1 = f1_metric.compute(predictions=predictions, references=labels, average='weighted')
    return {
        'accuracy': accuracy['accuracy'],
        'f1': f1['f1']
    }

# Training arguments
training_args = TrainingArguments(
    output_dir='./english_finetuned',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=50,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True,
    fp16=torch.cuda.is_available(),
    report_to="none"
)

# Data collator
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

# Fine-tune the model
trainer.train()

# Save the fine-tuned model
model.save_pretrained('./english_finetuned/model')
tokenizer.save_pretrained('./english_finetuned/tokenizer')

# Zero-Shot Evaluation on Shona and isiZulu
def evaluate_zero_shot(language, data_path):
    # Load dataset
    df_lang = pd.read_csv(data_path, encoding='utf-8')
    dataset_lang = Dataset.from_pandas(df_lang[['text', 'label']])
    dataset_lang = dataset_lang.cast_column('label', ClassLabel(num_classes=2, names=['human', 'machine']))

    # Tokenize
    eval_dataset = dataset_lang.map(tokenize_function, batched=True)
    eval_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])

    # Evaluate
    results = trainer.evaluate(eval_dataset)
    print(f"{language} Zero-Shot Evaluation Results: {results}")


# Evaluate on isiZulu
zulu_data_path = 'balanced_zulu_texts.csv'
evaluate_zero_shot("isiZulu", zulu_data_path)

In [None]:
# Suppress verbose warnings
warnings.filterwarnings("ignore")

# Configuration
# Store model paths in a dictionary for easier access
MODELS_INFO = {
    'eng': {'path': './english_finetuned'},
    'zul': {'path': './afroxlmr_finetuned'}
}
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
ID2LABEL = {0: 'Human-Generated', 1: 'Machine-Generated'}
LABEL2ID = {v: k for k, v in ID2LABEL.items()}
SHAP_PLOTS_DIR = 'shap_explanations'


# store loaded models on the CPU to avoid re-loading from disk
MODELS_CACHE = {}

# Create the directory for SHAP plots if it doesn't exist
os.makedirs(SHAP_PLOTS_DIR, exist_ok=True)

def get_or_load_model(lang_key, device):

    # Iterate through any models already in the cache
    for key, cached_data in MODELS_CACHE.items():
        # If there's a model on the GPU that is NOT the one we want, move it to CPU
        if key != lang_key and cached_data['model'].device.type == 'cuda':
            print(f"Moving '{key}' model to CPU to free up VRAM...")
            cached_data['model'].to('cpu')

    # Force Python's garbage collector and empty PyTorch's cache
    gc.collect()
    if device.type == 'cuda':
        torch.cuda.empty_cache()

    # Get the requested model
    if lang_key in MODELS_CACHE:
        # Model is already in our cache (on the CPU), just retrieve it
        print(f"Loading '{lang_key}' model from cache...")
        model = MODELS_CACHE[lang_key]['model']
        tokenizer = MODELS_CACHE[lang_key]['tokenizer']
    else:
        # Model is not cached, load it from disk
        print(f"Loading '{lang_key}' model from disk...")
        model_path_info = MODELS_INFO.get(lang_key)
        if not model_path_info:
            print(f"Error: No model path configured for language '{lang_key}'.")
            return None, None

        model_dir = os.path.join(model_path_info['path'], 'model')
        tokenizer_dir = os.path.join(model_path_info['path'], 'tokenizer')

        if not os.path.exists(model_dir) or not os.path.exists(tokenizer_dir):
            print(f"Error: Model or tokenizer not found at '{model_path_info['path']}'.")
            print("Please ensure you have trained and saved the models as per the notebook.")
            return None, None

        model = AutoModelForSequenceClassification.from_pretrained(model_dir)
        tokenizer = AutoTokenizer.from_pretrained(tokenizer_dir)

        # Store the newly loaded model and tokenizer in the cache
        MODELS_CACHE[lang_key] = {'model': model, 'tokenizer': tokenizer}

    # Move the requested model to the target device (GPU/CPU)
    if model.device.type != device.type:
        print(f"Moving '{lang_key}' model to {device.type.upper()}...")
        model.to(device)

    model.eval()
    print("...model is ready.")
    return model, tokenizer


def predict_and_explain(text, model, tokenizer, device):
    # Prediction
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=512)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)

    logits = outputs.logits
    probabilities = torch.softmax(logits, dim=-1).cpu().numpy()[0]
    prediction_idx = np.argmax(probabilities)
    predicted_label = ID2LABEL[prediction_idx]
    confidence = probabilities[prediction_idx]

    # SHAP Explanation
    print("\nGenerating SHAP explanation... (this may take a moment)")

    def predict_proba_for_shap(texts):
        if isinstance(texts, np.ndarray):
            texts = texts.tolist()
        inputs = tokenizer(texts, return_tensors='pt', padding=True, truncation=True, max_length=512).to(device)
        with torch.no_grad():
            outputs = model(**inputs)
        return torch.softmax(outputs.logits, dim=-1).cpu().numpy()

    explainer = shap.Explainer(predict_proba_for_shap, shap.maskers.Text(tokenizer, mask_token="<unk>"), output_names=list(ID2LABEL.values()))
    shap_values = explainer([text])

    # Generate and Save SHAP Plot
    class_index_to_explain = LABEL2ID['Machine-Generated']
    plt.figure()
    shap.plots.waterfall(shap_values[0, :, class_index_to_explain], show=False)

    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    plot_filename = f"shap_explanation_{lang_key}_{timestamp}.png"
    plot_path = os.path.join(SHAP_PLOTS_DIR, plot_filename)

    plt.savefig(plot_path, bbox_inches='tight')
    plt.close()
    print(f"...explanation saved to {plot_path}")

    return predicted_label, confidence, plot_path


def main():
    """
    Main function to run the interactive detector.
    """
    print("=" * 60)
    print(" Human vs. Machine Text Detector for English & isiZulu")
    print("=" * 60)
    print(f"Running on device: {str(DEVICE).upper()}")

    global lang_key # Make lang_key accessible to predict_and_explain for file naming

    while True:
        # Get User Input
        lang_choice = ""
        while lang_choice not in ['1', '2']:
            lang_choice = input("\nChoose a language:\n  1: English\n  2: isiZulu\nEnter choice (1 or 2): ")

        lang_key = 'eng' if lang_choice == '1' else 'zul'

        # Load model on-demand
        model, tokenizer = get_or_load_model(lang_key, DEVICE)

        # Check if the model failed to load
        if model is None:
            break # Exit if a model path is incorrect

        input_text = input(f"\nPlease enter the {lang_key.upper()} text you want to analyze:\n> ")

        if not input_text.strip():
            print("Error: Input text cannot be empty.")
            continue

        # Perform Analysis
        print("\nAnalyzing text...")
        predicted_label, confidence, plot_path = predict_and_explain(
            input_text, model, tokenizer, DEVICE
        )

        # Display Results
        print("\n" + "-"*25 + " ANALYSIS RESULTS " + "-"*25)
        print(f"  Prediction: The text is likely {predicted_label.upper()}")
        print(f"  Confidence: {confidence:.2%}")
        print(f"\nAn explanation plot has been saved here: {plot_path}")
        print("-" * 70)

        # Loop Control
        another = input("\nWould you like to analyze another text? (y/n): ").lower()
        if another != 'y':
            print("\nThank you for using the detector. Goodbye!")
            break

if __name__ == "__main__":
    main()

In [None]:
# SECOND TEST
# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Load the fine-tuned model and tokenizer
model_path = "./afroxlmr_finetuned/model"
tokenizer_path = "./afroxlmr_finetuned/tokenizer"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)

# Create a text classification pipeline
classifier = pipeline(
    "text-classification",
    model=model,
    tokenizer=tokenizer,
    device=0 if torch.cuda.is_available() else -1,  # Use GPU if available
    return_all_scores=True
)

# Define label mapping
label_mapping = {0: "human", 1: "machine"}

# Function to predict and explain
def predict_and_explain(text):
    # Predict
    prediction = classifier(text)[0]
    predicted_label = label_mapping[int(prediction[0]["label"].split("_")[1])]
    human_prob = prediction[0]["score"] if predicted_label == "human" else prediction[1]["score"]
    machine_prob = prediction[1]["score"] if predicted_label == "human" else prediction[0]["score"]

    # SHAP Explanation
    explainer = shap.Explainer(classifier)
    shap_values = explainer([text])

    # Extract SHAP values for the predicted class
    predicted_class_idx = 0 if predicted_label == "human" else 1
    shap_values_for_predicted_class = shap_values[0, :, predicted_class_idx]

    # Format results
    result = {
        "text": text,
        "predicted_label": predicted_label,
        "human_probability": human_prob,
        "machine_probability": machine_prob,
        "shap_values": shap_values_for_predicted_class
    }

    # Print results
    print(f"\nInput Text: {text}")
    print(f"Predicted Label: {predicted_label}")
    print(f"Human Probability: {human_prob:.4f}")
    print(f"Machine Probability: {machine_prob:.4f}")
    print("\nSHAP Explanation:")
    shap.plots.text(shap_values[0, :, :])

    return result

# Interactive input loop
def main():
    print("Text Classification and SHAP Explanation Tool")
    print("Enter text to classify as human or machine-generated. Type 'exit' to quit.")

    while True:
        user_input = input("\nEnter text: ").strip()
        if user_input.lower() == "exit":
            print("Exiting...")
            break
        if not user_input:
            print("Please enter some text.")
            continue

        # Predict and explain
        predict_and_explain(user_input)

if __name__ == "__main__":
    main()