<a href="https://colab.research.google.com/github/Amanda9805/Detecting-Machine-Generated-Texts/blob/train-model/train_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install --upgrade transformers adapters datasets fsspec evaluate shap nltk lime textstat

# **Importing Libraries and Initial Setup**
Here, we import the required Python libraries for data manipulation, model development, and analysis. This step also includes initial setup tasks and setting random seeds to ensure the reproducibility of our experiments.

In [None]:
import os
import re
import json
import gc
import warnings
import torch
import nltk
import evaluate
import shap
import lime
import shutil

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

from nltk.tokenize import word_tokenize
from collections import Counter
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split, StratifiedKFold
from lime.lime_text import LimeTextExplainer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import LabelEncoder
from peft import LoraConfig, get_peft_model
from textstat import flesch_reading_ease, automated_readability_index
from huggingface_hub import login
from datasets import load_dataset, Dataset, ClassLabel, concatenate_datasets
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding,
    EarlyStoppingCallback,
    pipeline
)

# Initial setup
nltk.download('punkt_tab')


torch.manual_seed(42)
np.random.seed(42)
warnings.filterwarnings('ignore')



# **Data Collection and Preprocessing Functions**
This cell defines a set of helper functions to load, clean, and prepare the datasets, corresponding to Phase 1 of our methodology. This includes cleaning text by removing URLs and non-alphabetic characters, loading data from different sources, and preparing balanced datasets for human- vs. machine-generated text.

In [None]:
def clean_text(text):
    # Remove URLs
    text = re.sub(r'http\\S+|www\\S+|https\\S+', '', text, flags=re.MULTILINE)
    # Remove non-alphabetic characters
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    # Convert to lowercase
    text = text.lower()
    # Remove extra whitespace
    text = ' '.join(text.split())
    return text

def load_machine_text(file_path):
    data = []
    try:
        # Open and read the machine-generated text file line by line
        with open(file_path, 'r', encoding='utf-8') as file:
            for line in file:
                if line.strip():
                    data.append(json.loads(line))

        # Convert list of dicts to DataFrame
        df = pd.DataFrame(data)
        if not df.empty and isinstance(df['text'].iloc[0], list):
            df['text'] = df['text'].apply(lambda x: ' '.join(x))
        return df
    except FileNotFoundError:
        print(f"Error: Machine-generated text file not found at {file_path}")
        return pd.DataFrame()

def prepare_dataset(lang_code, human_data_source, machine_data_path, output_csv_path):
    print(f"Preparing dataset for language: {lang_code.upper()}")

    # Load human-written dataset from HuggingFace Datasets
    human_dataset = load_dataset(human_data_source, lang_code)

    human_df = human_dataset['train'].to_pandas()
    human_df = human_df[['text']].dropna()
    human_df['label'] = 0
    human_df['language'] = lang_code

    print(f"Loaded {len(human_df)} human-written data.")

    # Load machine-generated dataset
    machine_df = load_machine_text(machine_data_path)

    if machine_df.empty:
      return None

    machine_df = machine_df[['text']].dropna()
    machine_df['label'] = 1
    machine_df['language'] = lang_code

    print(f"Loaded {len(machine_df)} machine-generated data.")

    # Combine human and machine data
    combined_df = pd.concat([human_df, machine_df], ignore_index=True)
    combined_df['text'] = combined_df['text'].apply(clean_text)
    # Drop rows with missing text
    combined_df.dropna(subset=['text'], inplace=True)

    # Balance the dataset by sampling the same number of samples from each class
    min_class_size = combined_df['label'].value_counts().min()
    balanced_df = combined_df.groupby('label').sample(n=min_class_size, random_state=42)
    # Shuffle the balanced dataset
    balanced_df = balanced_df.sample(frac=1, random_state=42).reset_index(drop=True)

    print(f"Created a balanced dataset with {len(balanced_df)} total samples ({min_class_size} per class).")
    # Save the balanced dataset to CSV
    balanced_df.to_csv(output_csv_path, index=False, encoding='utf-8')
    print(f"Balanced dataset saved to {output_csv_path}")
    return balanced_df



# **Creating and Balancing the Datasets**
In this step, we execute the data preparation functions to create balanced training and testing datasets for both isiZulu and English.

We load human-written text from the `vukuzenzele` dataset and combine it with machine-generated text samples to form the basis for our model training and evaluation.

In [None]:
# Create small example machine-generated text files if they don't exist
if not os.path.exists('zulu_mg_text.jsonl'):
    with open('zulu_mg_text.jsonl', 'w') as f:
        f.write('{"text": "Lokhu umbhalo owenziwe ngomshini mayelana namasiko akwaZulu."}\n')
        f.write('{"text": "UNogwaja noFudu babengabangani abakhulu ehlathini."}\n')

if not os.path.exists('eng_mg_data.jsonl'):
    with open('eng_mg_data.jsonl', 'w') as f:
        f.write('{"text": "This is machine-generated text about South African culture."}\n')
        f.write('{"text": "The quick brown fox jumps over the lazy dog in Johannesburg."}\n')

# Prepare balanced datasets for isiZulu and English
zulu_dataset = prepare_dataset('zul', 'dsfsi/vukuzenzele-monolingual', 'zulu_mg_text.jsonl', 'balanced_zulu_texts.csv')
english_dataset = prepare_dataset('eng', 'dsfsi/vukuzenzele-monolingual', 'eng_mg_data.jsonl', 'balanced_english_texts.csv')

# Display a sample of each dataset for verification
print("\nZulu Dataset Sample:")
print(pd.DataFrame(zulu_dataset).head())
print("\nEnglish Dataset Sample:")
print(pd.DataFrame(english_dataset).head())

# **Model Architecture: The Text Classifier Class**
This class, `ZuluTextClassifier` aliased as `TextClassifier`, defines the core architecture for our model. It handles loading a pre-trained transformer model (like AfriBERTa), preparing data splits, setting up Low-Rank Adaptation (LoRA) for efficient fine-tuning, and implementing the training and evaluation loops, aligning with Phase 2 of our project plan.

In [None]:
class ZuluTextClassifier:
    """AfriBERTa classifier for Zulu human vs machine text detection"""
    def __init__(self, model_name='castorini/afriberta_base', max_length=512):
        # Initialize the tokenizer and model for sequence classification
        print(f"Loading model: {model_name}")

        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
        self.max_length = max_length
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model.to(self.device)

        print(f"Model initialized on {self.device}")

    # Prepare train and test splits from a DataFrame
    def prepare_data(self, df):
        X = df['text'].tolist()
        y = LabelEncoder().fit_transform(df['label'])
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

        return X_train, X_test, y_train, y_test

    # Tokenize and encode the train and test data
    def create_datasets(self, X_train, X_test, y_train, y_test):
        train_encodings = self.tokenizer(X_train, truncation=True, padding=True, max_length=self.max_length)
        test_encodings = self.tokenizer(X_test, truncation=True, padding=True, max_length=self.max_length)

        train_data = {
            'input_ids': train_encodings['input_ids'],
            'attention_mask': train_encodings['attention_mask'],
            'labels': y_train
            }
        test_data = {
            'input_ids': test_encodings['input_ids'],
            'attention_mask': test_encodings['attention_mask'],
            'labels': y_test
            }

        train_dataset = Dataset.from_dict(train_data)
        test_dataset = Dataset.from_dict(test_data)

        return train_dataset, test_dataset

    # enable Low-Rank Adaptation for efficient fine-tuning
    def setup_lora(self, use_lora=True):
        if use_lora:
            print("Setting up LoRA configuration...")
            lora_config = LoraConfig(r=16, lora_alpha=32, target_modules=["query", "key", "value"], lora_dropout=0.1, bias="none", task_type="SEQ_CLS")
            self.model = get_peft_model(self.model, lora_config)
            print(f"LoRA enabled. Trainable parameters: {self.model.num_parameters()}")

    # Train the model using HuggingFace Trainer
    def train(self, train_dataset, eval_dataset, output_dir):
        self.setup_lora(use_lora=True)
        training_args = TrainingArguments(
            output_dir=output_dir,
            num_train_epochs=5,
            per_device_train_batch_size=8,
            per_device_eval_batch_size=8,
            warmup_steps=100,
            weight_decay=0.01,
            learning_rate=5e-5,
            logging_steps=50,
            eval_strategy="steps",
            eval_steps=50,
            save_strategy="steps",
            save_steps=50,
            load_best_model_at_end=True,
            metric_for_best_model="eval_loss",
            fp16=torch.cuda.is_available(),
            report_to="none"
            )
        trainer = Trainer(
            model=self.model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=eval_dataset,
            tokenizer=self.tokenizer
            )

        # Print where a specific model will be stored and save
        print(f"Starting training... Results will be saved to {output_dir}")

        trainer.train()
        trainer.save_model()
        self.tokenizer.save_pretrained(output_dir)

        return trainer

    # Evaluate the model and return metrics
    def evaluate(self, test_dataset, trainer=None):
        if trainer is None:
          trainer = Trainer(model=self.model)

        predictions = trainer.predict(test_dataset)

        y_pred = np.argmax(predictions.predictions, axis=1)
        y_true = predictions.label_ids

        accuracy = accuracy_score(y_true, y_pred)

        precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')
        precision_per_class, recall_per_class, f1_per_class, support = precision_recall_fscore_support(y_true, y_pred, average=None)

        return {
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1': f1,
            'precision_per_class': precision_per_class,
            'recall_per_class': recall_per_class,
            'f1_per_class': f1_per_class,
            'support': support,
            'y_true': y_true,
            'y_pred': y_pred}

    # Print formatted evaluation results
    def print_evaluation_results(self, results):
        print("\n" + "="*50 + "\nEVALUATION RESULTS\n" + "="*50)
        print(f"Overall Accuracy: {results['accuracy']:.4f}\nWeighted Precision: {results['precision']:.4f}\nWeighted Recall: {results['recall']:.4f}\nWeighted F1-Score: {results['f1']:.4f}\n\nPer-Class Results:")

        class_names = ['Human', 'Machine']

        for i in range(len(class_names)):
            print(f"{class_names[i]}:\n  Precision: {results['precision_per_class'][i]:.4f}\n  Recall: {results['recall_per_class'][i]:.4f}\n  F1-Score: {results['f1_per_class'][i]:.4f}\n  Support: {results['support'][i]}")
        print("\nClassification Report:\n", classification_report(results['y_true'], results['y_pred'], target_names=class_names))

    # Predict the class of a single text string
    def predict_single_text(self, text):
        self.model.eval()
        encoding = self.tokenizer(text, truncation=True, padding='max_length', max_length=self.max_length, return_tensors='pt')
        encoding = {k: v.to(self.device) for k, v in encoding.items()}

        with torch.no_grad():
            outputs = self.model(**encoding)
            predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
            predicted_class = torch.argmax(predictions, dim=-1).item()
            confidence = predictions.max().item()

        class_names = ['Human', 'Machine']
        return {
            'predicted_class': class_names[predicted_class],
            'confidence': confidence,
            'probabilities': {
                'Human': predictions[0][0].item(),
                'Machine': predictions[0][1].item()
                }
            }

# **Explainability Framework: LIME and Linguistic Analysis**
The `TextExplainabilityAnalyzer` class implements our explainability framework. It uses LIME (Local Interpretable Model-agnostic Explanations) to analyze feature importance, performs linguistic feature analysis (lexical diversity, sentence length) and conducts error analysis on misclassified examples to address our core research questions.

In [None]:
# Explainability analysis for machine vs human text classification
class TextExplainabilityAnalyzer:
    # Initialize with model, tokenizer, device, and class names
    def __init__(self, model, tokenizer, device):
        self.model, self.tokenizer, self.device, self.class_names = model, tokenizer, device, ['Human', 'Machine']
        self.lime_explainer = LimeTextExplainer(class_names=self.class_names)

    # Predict class probabilities for a list of texts for LIME
    def predict_proba_for_lime(self, texts):
        predictions = []
        self.model.eval()

        for text in texts:
            encoding = self.tokenizer(text, truncation=True, padding='max_length', max_length=512, return_tensors='pt')
            encoding = {k: v.to(self.device) for k, v in encoding.items()}
            with torch.no_grad():
                outputs = self.model(**encoding)
                probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
                predictions.append(probs.cpu().numpy()[0])

        return np.array(predictions)

    # Generate LIME explanation for a single text
    def explain_with_lime(self, text, num_features=10):
        return self.lime_explainer.explain_instance(text, self.predict_proba_for_lime, num_features=num_features, num_samples=500)

    # Analyze most important features for each class using LIME
    def analyze_feature_importance(self, test_texts, test_labels, sample_ratio=0.3):
        results = {
            'human_features': [],
            'machine_features': []
            }

        num_samples = int(len(test_texts) * sample_ratio)
        print(f"Analyzing {num_samples} samples ({sample_ratio*100:.0f}%)")

        sample_indices, _ = train_test_split(range(len(test_texts)), test_size=1-sample_ratio, stratify=test_labels, random_state=42)
        for idx in tqdm(sample_indices, desc="Generating LIME explanations"):
            text, true_label = test_texts[idx], test_labels[idx]

            if not text or text.isspace():
               continue
            try:
                features = self.explain_with_lime(text).as_list()

                if true_label == 0:
                  results['human_features'].extend([f[0] for f in features if f[1] > 0])
                else:
                  results['machine_features'].extend([f[0] for f in features if f[1] > 0])
            except Exception as e:
              print(f"LIME Error on index {idx}: {e}")

        results['human_patterns'] = Counter(results['human_features']).most_common(20)
        results['machine_patterns'] = Counter(results['machine_features']).most_common(20)

        return results

    # Compute basic linguistic features for sampled texts
    def linguistic_feature_analysis(self, texts, labels, sample_ratio=0.3):
        num_samples = int(len(texts) * sample_ratio)
        sample_indices, _ = train_test_split(range(len(texts)), test_size=1-sample_ratio, stratify=labels, random_state=42)
        sampled_texts, sampled_labels = [texts[i] for i in sample_indices], [labels[i] for i in sample_indices]
        print(f"Running linguistic analysis on {len(sampled_texts)} samples ({sample_ratio*100:.0f}%)")

        features = {'avg_sentence_length': [], 'lexical_diversity': [], 'repetition_score': []}
        for text in tqdm(sampled_texts, desc="Computing linguistic features"):
            sentences = re.split(r'[.!?]+', text)
            avg_sent_len = np.mean([len(s.split()) for s in sentences if s.strip()]) if any(s.strip() for s in sentences) else 0

            features['avg_sentence_length'].append(avg_sent_len)
            words = text.lower().split()

            lexical_div = len(set(words)) / len(words) if words else 0
            features['lexical_diversity'].append(lexical_div)

            word_counts = Counter(words)
            repetition = sum(count - 1 for count in word_counts.values()) / len(words) if words else 0

            features['repetition_score'].append(repetition)

        human_features = {k: [v[i] for i, label in enumerate(sampled_labels) if label == 0] for k, v in features.items()}
        machine_features = {k: [v[i] for i, label in enumerate(sampled_labels) if label == 1] for k, v in features.items()}

        return human_features, machine_features

    # Plot histograms for each linguistic feature for both classes
    def plot_feature_distributions(self, human_features, machine_features):
        fig, axes = plt.subplots(1, 3, figsize=(18, 5))
        axes = axes.ravel()
        feature_names = list(human_features.keys())

        for i, feature in enumerate(feature_names):
            ax = axes[i]
            sns.histplot(human_features[feature], ax=ax, color='skyblue', label='Human', kde=True)
            sns.histplot(machine_features[feature], ax=ax, color='salmon', label='Machine', kde=True)

            ax.set_title(feature.replace('_', ' ').title(), fontsize=14)
            ax.set_xlabel('Value')
            ax.set_ylabel('Frequency')
            ax.legend()

        plt.tight_layout()
        plt.show()

    # Analyze misclassified examples and generate LIME explanations for a sample
    def error_analysis(self, test_texts, test_labels, predictions, sample_ratio=0.3):
        all_errors = [
            {
                'text': test_texts[i],
                'true_label': self.class_names[true_label],
                'predicted_label': self.class_names[pred_label]
                }
            for i, (true_label, pred_label) in enumerate(zip(test_labels, predictions)) if true_label != pred_label
            ]

        num_errors_to_analyze = max(1, int(len(all_errors) * sample_ratio))
        errors_to_analyze = np.random.choice(all_errors, min(num_errors_to_analyze, len(all_errors)), replace=False) if all_errors else []

        print(f"Found {len(all_errors)} total errors, analyzing {len(errors_to_analyze)} in detail")
        error_analysis_res = {'total_errors': len(all_errors), 'false_positives': len([e for e in all_errors if e['true_label'] == 'Human']), 'false_negatives': len([e for e in all_errors if e['true_label'] == 'Machine']), 'examples': all_errors[:10]}
        error_explanations = []

        for error in tqdm(errors_to_analyze[:5], desc="Generating error explanations"):
            try:
                explanation = self.explain_with_lime(error['text'])
                error_explanations.append({'error': error, 'explanation': explanation.as_list()})
            except Exception as e: print(f"Error generating explanation: {e}")

        error_analysis_res['explanations'] = error_explanations

        return error_analysis_res

    # Run all explainability analyses and print a summary report
    def generate_explanation_report(self, test_texts, test_labels, predictions, sample_ratio=0.3):
        print("="*60 + f"\nMODEL EXPLAINABILITY ANALYSIS REPORT (Analyzing {sample_ratio*100:.0f}% of data)\n" + "="*60)
        print("\n1. FEATURE IMPORTANCE ANALYSIS (LIME)\n" + "-" * 40)

        feature_analysis = self.analyze_feature_importance(test_texts, test_labels, sample_ratio)
        print("Top features indicating HUMAN text:")
        [print(f"  - '{f}': {c}") for f, c in feature_analysis['human_patterns'][:10]]
        print("\nTop features indicating MACHINE text:")
        [print(f"  - '{f}': {c}") for f, c in feature_analysis['machine_patterns'][:10]]

        print("\n2. LINGUISTIC FEATURE ANALYSIS\n" + "-" * 40)
        human_features, machine_features = self.linguistic_feature_analysis(test_texts, test_labels, sample_ratio)
        for name in human_features.keys():
            print(f"{name.replace('_', ' ').title()}:\n  Human avg: {np.mean(human_features[name]):.3f}, Machine avg: {np.mean(machine_features[name]):.3f}")

        print("\n3. ERROR ANALYSIS\n" + "-" * 40)
        error_analysis_res = self.error_analysis(test_texts, test_labels, predictions, sample_ratio)
        print(f"Total classification errors: {error_analysis_res['total_errors']}\nFalse positives (Human → Machine): {error_analysis_res['false_positives']}\nFalse negatives (Machine → Human): {error_analysis_res['false_negatives']}")
        print("\nExample misclassifications:")
        for i, ex in enumerate(error_analysis_res['examples'][:3]):
            print(f"  {i+1}. True: {ex['true_label']}, Predicted: {ex['predicted_label']}\n     Text: {ex['text'][:100]}...")

        return {'feature_analysis': feature_analysis, 'linguistic_analysis': (human_features, machine_features), 'error_analysis': error_analysis_res}


# Run explainability analysis and plot feature distributions
def add_explainability_to_classifier(classifier, test_texts, test_labels, predictions, sample_ratio=0.3):
    explainer = TextExplainabilityAnalyzer(classifier.model, classifier.tokenizer, classifier.device)
    report = explainer.generate_explanation_report(test_texts, test_labels, predictions, sample_ratio)

    print("\n4. LINGUISTIC FEATURE DISTRIBUTION PLOTS\n" + "-" * 40)
    human_features, machine_features = report['linguistic_analysis']
    explainer.plot_feature_distributions(human_features, machine_features)

    return explainer, report

# **Experiment Orchestration: Defining Training Pipelines**
This cell contains the high-level functions `run_experiment` and `run_zero_shot_experiment`. These functions orchestrate the entire training, evaluation and explainability pipeline for a given model and language dataset, enabling systematic testing of our hypotheses regarding cross-lingual transfer and model performance.

In [None]:
# alias for the main classifier class
class TextClassifier(ZuluTextClassifier):
  pass

# Run a full experiment: train, evaluate, and explain
def run_experiment(model_name, dataset_df, language_name, run_explainability=False):
    print("\n" + "="*80 + f"\nSTARTING EXPERIMENT: Model='{model_name}', Language='{language_name}'\n" + "="*80)
    sanitized_model_name = model_name.replace("/", "-")
    output_dir = f"./fineTunedModes/results_{sanitized_model_name}_{language_name}"

    classifier = TextClassifier(model_name=model_name)

    # Prepare data splits
    X_train, X_test, y_train, y_test = classifier.prepare_data(dataset_df)

    train_dataset, test_dataset = classifier.create_datasets(X_train, X_test, y_train, y_test)
    trainer = classifier.train(train_dataset, test_dataset, output_dir=output_dir)

    # Evaluate the model
    results = classifier.evaluate(test_dataset, trainer)
    classifier.print_evaluation_results(results)

    if run_explainability:
        print("\nRunning Full Explainability Analysis")
        add_explainability_to_classifier(classifier, X_test, y_test, results['y_pred'], sample_ratio=0.3)

    # Cleanup resources
    del classifier, trainer, train_dataset, test_dataset
    torch.cuda.empty_cache()
    gc.collect()

    return {
        "model_name": model_name,
        "language": language_name,
        "accuracy": results['accuracy'],
        "f1_score": results['f1'],
        "precision": results['precision'],
        "recall": results['recall']}

# Train on source language then evaluate on target language
def run_zero_shot_experiment(model_name, source_df, target_df):
    print("\n" + "="*80 + f"\nSTARTING ZERO-SHOT EXPERIMENT: Model='{model_name}', Source='English', Target='isiZulu'\n" + "="*80)
    sanitized_model_name = model_name.replace("/", "-")
    output_dir = f"./fineTunedModes/results_{sanitized_model_name}_eng-for-zeroshot"

    # Initialize classifier with the specified model
    classifier = TextClassifier(model_name=model_name)

    # Prepare data splits for the source language (English)
    X_train_src, X_test_src, y_train_src, y_test_src = classifier.prepare_data(source_df)
    train_dataset_src, eval_dataset_src = classifier.create_datasets(X_train_src, X_test_src, y_train_src, y_test_src)

    print("\nTraining on source language (English)")
    # Train the model on the source language
    trainer = classifier.train(train_dataset_src, eval_dataset_src, output_dir=output_dir)

    # Prepare test data for the target language (isiZulu)
    print("\nEvaluating on target language (isiZulu)")
    _ , X_test_tgt, _, y_test_tgt = classifier.prepare_data(target_df)
    target_encodings = classifier.tokenizer(X_test_tgt, truncation=True, padding=True, max_length=classifier.max_length)

    target_test_data = {
        'input_ids': target_encodings['input_ids'],
        'attention_mask': target_encodings['attention_mask'],
        'labels': y_test_tgt
        }
    test_dataset_tgt = Dataset.from_dict(target_test_data)

    # Evaluate the trained model on the target language test set
    results = classifier.evaluate(test_dataset_tgt, trainer)

    classifier.print_evaluation_results(results)

    # Cleanup resources
    del classifier, trainer
    torch.cuda.empty_cache()
    gc.collect()

    return {
        "model_name": f"{model_name} (Zero-Shot)",
        "language": "zul",
        "accuracy": results['accuracy'],
        "f1_score": results['f1'],
        "precision": results['precision'],
        "recall": results['recall']
        }



# **Defining the Full Project Pipeline**
The `full_project_pipeline` function defines the complete experimental workflow. It iterates through the specified models (AfriBERTa, XLM-Roberta) and languages (isiZulu, English), runs both standard fine-tuning and zero-shot experiments, and generates a final comparative analysis report with visualizations to summarize our findings.

After training if you want to use the trained models, download the zipped file and extract it into where you have your server that you use for prediction

In [None]:
# Run all experiments and print comparative results
def full_project_pipeline():
    # List of models to test (AfriBERTa and XLM-R)
    MODELS_TO_TEST = ['castorini/afriberta_base', 'xlm-roberta-base']
    # Datasets for isiZulu and English
    DATASETS = {"zul": zulu_dataset, "eng": english_dataset}
    all_results = []

    # Run standard experiments for each model and language
    for lang_code, df in DATASETS.items():
        for model in MODELS_TO_TEST:
            is_primary_exp = (lang_code == 'zul' and model == 'castorini/afriberta_base')
            result = run_experiment(model, df, lang_code, run_explainability=is_primary_exp)
            all_results.append(result)

    # Run zero-shot experiments (train on English, test on isiZulu)
    for model in MODELS_TO_TEST:
        zero_shot_result = run_zero_shot_experiment(model, DATASETS['eng'], DATASETS['zul'])
        all_results.append(zero_shot_result)

    # Print final comparative analysis report
    print("\n\n" + "#"*80 + "\n" + " " * 20 + "FINAL COMPARATIVE ANALYSIS REPORT\n" + "#"*80)
    results_df = pd.DataFrame(all_results)
    print("\n--- Performance Metrics Across All Experiments ---\n", results_df)

    # Plot F1-score comparison for all experiments
    plt.style.use('seaborn-v0_8-whitegrid')
    fig, ax = plt.subplots(figsize=(12, 7))
    pivot_df = results_df.pivot(index='language', columns='model_name', values='f1_score')
    pivot_df.plot(kind='bar', ax=ax, width=0.8)
    ax.set_title('Comparative F1-Scores: Model Performance on English vs. isiZulu', fontsize=16)
    ax.set_ylabel('F1-Score', fontsize=12)
    ax.set_xlabel('Language', fontsize=12)
    ax.tick_params(axis='x', rotation=0)
    ax.legend(title='Model & Training Strategy')
    plt.tight_layout()
    plt.show()

    print("\n" + "="*80 + "\nPACKAGING TRAINED MODELS FOR DEPLOYMENT\n" + "="*80)

    dir_path = "./fineTunedModes"
    if os.path.exists(dir_path):
        try:
          shutil.make_archive(base_name=dir_path, format='zip', root_dir=dir_path)
          print(f"✅ Zipped '{dir_path}' to '{dir_path}.zip'")
        except Exception as e:
          print(f"❌ Failed to zip '{dir_path}'. Error: {e}")
    else:
      print(f"⚠️ Directory not found, skipping zip: '{dir_path}'")


# **Executing All Experiments**
This is the main execution cell. It calls the `full_project_pipeline` function to run all defined experiments, generate performance metrics, create comparison plots, and package the fine-tuned models for deployment, delivering the primary outcomes of the projec

In [None]:
if 'zulu_dataset' in locals() and 'english_dataset' in locals():
  full_project_pipeline()
else:
  print("Data preparation failed or was skipped. Cannot run the main pipeline.")

# **Interactive Demonstration with the Fine-Tuned Zulu Model**

Note: ***This is for quick testing only, just after training. The main prediction will be done in the web interface provided in the project***

This final cell provides an interactive command-line interface to test the best-performing model for isiZulu (`castorini/afriberta_base`). It allows a user to input Zulu text and receive a real-time classification of whether it is likely human- or machine-generated, demonstrating a practical application of the research.

In [None]:
if __name__ == "__main__":
  print("\nStarting interactive session with the fine-tuned AfriBERTa Zulu model...")
  interactive_model_path = "./fineTunedModes/results_castorini-afriberta_base_zul"
  if os.path.exists(interactive_model_path):
      try:
        # Load the fine-tuned classifier for interactive use
          interactive_classifier = TextClassifier(model_name=interactive_model_path)
          print("\nZulu Text Classification Tool")
          print("Enter Zulu text to classify. Type 'exit' to quit.")

          while True:
              user_input = input("\nEnter Zulu text: ").strip()
              if user_input.lower() == "exit":
                print("Exiting...")
                break
              if not user_input:
                print("Please enter some text.")
                continue

              prediction = interactive_classifier.predict_single_text(user_input)
              print(f"\nPrediction for '{user_input}':")
              print(f"  Predicted: {prediction['predicted_class']} (Confidence: {prediction['confidence']:.4f})")
              print(f"  Probabilities: Human - {prediction['probabilities']['Human']:.4f}, Machine - {prediction['probabilities']['Machine']:.4f}")
              print("-" * 50)
      except Exception as e:
          print(f"\nCould not start interactive session. An error occurred during model loading: {e}")
  else:
      print(f"\nCould not start interactive session. Model directory not found at: {interactive_model_path}")