In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install torch transformers scikit-learn pandas

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
import torch
from datasets import Dataset as HFDataset
import os
import pandas as pd

In [None]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
class HuggingFaceTransformer:
    def __init__(self, model_name='roberta-base'):
        self.model_name = model_name
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model.to(self.device)

    def train(self, texts, labels, epochs=3, batch_size=8):
        train_texts, val_texts, train_labels, val_labels = train_test_split(texts, labels, test_size=0.2)

        train_dataset = HFDataset.from_dict({'text': train_texts, 'label': train_labels})
        val_dataset = HFDataset.from_dict({'text': val_texts, 'label': val_labels})

        def tokenize(example):
            return self.tokenizer(example['text'], truncation=True, padding='max_length', max_length=128)

        train_dataset = train_dataset.map(tokenize, batched=True, num_proc=1)
        val_dataset = val_dataset.map(tokenize, batched=True, num_proc=1)

        train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
        val_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

        training_args = TrainingArguments(
            output_dir='./results_hf',
            num_train_epochs=epochs,
            per_device_train_batch_size=batch_size,
            per_device_eval_batch_size=batch_size,
            eval_strategy="epoch",
            save_strategy="epoch",
            learning_rate=2e-5,
            weight_decay=0.01,
            logging_dir='./logs_hf',
            logging_steps=10,
            save_total_limit=1,
            fp16=torch.cuda.is_available(),
            report_to="none"
        )

        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            preds = logits.argmax(-1)
            precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
            acc = accuracy_score(labels, preds)
            return {"accuracy": acc, "precision": precision, "recall": recall, "f1": f1}

        trainer = Trainer(
            model=self.model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=val_dataset,
            compute_metrics=compute_metrics
        )
        trainer.train()
        self.save("models/hf_roberta")

    def predict(self, texts, batch_size=16):
      self.model.eval()
      all_preds = []
      for i in range(0, len(texts), batch_size):
          batch_texts = texts[i:i + batch_size]
          inputs = self.tokenizer(batch_texts, return_tensors="pt", padding=True, truncation=True, max_length=128).to(self.device)
          with torch.no_grad():
              outputs = self.model(**inputs)
              probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
              batch_preds = torch.argmax(probs, dim=1).cpu().numpy()
          all_preds.extend(batch_preds)
      return all_preds


    def predict_proba(self, texts, batch_size=16):
        self.model.eval()
        all_probs = []

        for i in range(0, len(texts), batch_size):
            batch_texts = texts[i:i + batch_size]
            inputs = self.tokenizer(
                batch_texts,
                return_tensors="pt",
                padding=True,
                truncation=True,
                max_length=128
            ).to(self.device)

            with torch.no_grad():
                outputs = self.model(**inputs)
                probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
                batch_probs = probs[:, 1].cpu().numpy()

            all_probs.extend(batch_probs)

        return np.array(all_probs)


    def save(self, path):
        os.makedirs(path, exist_ok=True)
        self.model.save_pretrained(path)
        self.tokenizer.save_pretrained(path)

    @classmethod
    def load(cls, path):
        tokenizer = AutoTokenizer.from_pretrained(path, local_files_only=True)
        model = AutoModelForSequenceClassification.from_pretrained(path)
        instance = cls()
        instance.tokenizer = tokenizer
        instance.model = model.to(instance.device)
        return instance

In [None]:
from collections import Counter
import spacy
import re
from typing import List, Dict
from sklearn.base import BaseEstimator, TransformerMixin
from tqdm import tqdm

from sklearn.base import BaseEstimator, TransformerMixin
from collections import Counter
from typing import List, Dict
import numpy as np
import spacy
from tqdm import tqdm

class StylometricFeatureExtractor(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.nlp = spacy.load('en_core_web_sm')

    def get_lexical_features(self, doc) -> Dict[str, float]:
        words = [token.text.lower() for token in doc if not token.is_punct]
        word_lengths = [len(word) for word in words]
        unique_words = set(words)
        total_words = len(words)

        function_words = [token.text.lower() for token in doc if token.is_stop]
        content_words = [token.text.lower() for token in doc if not token.is_stop and not token.is_punct]

        return {
            'avg_word_length': np.mean(word_lengths) if word_lengths else 0,
            'type_token_ratio': len(unique_words) / total_words if total_words else 0,
            'function_word_ratio': len(function_words) / total_words if total_words else 0,
            'content_word_ratio': len(content_words) / total_words if total_words else 0,
        }

    def get_syntactic_features(self, doc) -> Dict[str, float]:
        total_tokens = len(doc)
        pos_counts = Counter([token.pos_ for token in doc])

        # Limit to common POS tags
        common_tags = ['NOUN', 'VERB', 'ADJ', 'ADV', 'PRON']
        pos_features = {
            f'pos_{pos}': pos_counts.get(pos, 0) / total_tokens if total_tokens else 0
            for pos in common_tags
        }

        clause_count = sum(1 for _ in doc.sents)

        return {
            **pos_features,
            'clause_count': clause_count
        }

    def get_statistical_features(self, doc) -> Dict[str, float]:
        sentences = list(doc.sents)
        text = doc.text
        words = text.split()

        sent_lengths = [len(sent.text.split()) for sent in sentences]
        capitalized_words = sum(1 for word in words if word and word[0].isupper())

        return {
            'avg_sentence_length': np.mean(sent_lengths) if sent_lengths else 0,
            'exclamation_ratio': text.count('!') / len(text) if text else 0,
            'question_ratio': text.count('?') / len(text) if text else 0,
            'capitalized_ratio': capitalized_words / len(words) if words else 0
        }

    def fit(self, X, y=None):
        return self

    def transform(self, texts: List[str]) -> np.ndarray:
        features_list = []

        for doc in tqdm(self.nlp.pipe(texts, batch_size=50), total=len(texts), desc="Extracting features"):
            features = {}
            features.update(self.get_lexical_features(doc))
            features.update(self.get_syntactic_features(doc))
            features.update(self.get_statistical_features(doc))

            feature_values = [value for key, value in sorted(features.items())]
            features_list.append(feature_values)

        return np.array(features_list)


In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import joblib

class StylometricClassifier(BaseEstimator, TransformerMixin):
    def __init__(self):
        """
        Initialize the stylometric classifier using logistic regression
        """
        self.classifier = LogisticRegression(
            max_iter=1000,
            random_state=42,
            # Add L2 regularization to prevent overfitting
            C=1.0,
            # Use balanced class weights for potentially imbalanced datasets
            class_weight='balanced',
            verbose=1
        )
        self.feature_extractor = StylometricFeatureExtractor()
        self.scaler = StandardScaler()

    def fit(self, texts, labels):
        """
        Extract features and train the classifier
        """
        # Extract and scale features
        X = self.feature_extractor.transform(texts)
        X_scaled = self.scaler.fit_transform(X)

        # Train classifier
        self.classifier.fit(X_scaled, labels)
        return self

    def predict_proba(self, texts):
        """
        Get probability predictions
        """
        # Extract and scale features
        X = self.feature_extractor.transform(texts)
        X_scaled = self.scaler.transform(X)

        # Get predictions
        return self.classifier.predict_proba(X_scaled)[:, 1]

    def predict(self, texts):
        """
        Get binary predictions
        """
        return (self.predict_proba(texts) >= 0.5).astype(int)

    def get_feature_importance(self):
        """
        Get feature importance scores from the logistic regression coefficients
        """
        if not hasattr(self.classifier, 'coef_'):
            raise ValueError("Model has not been trained yet")

        # Get feature names from a sample transformation
        sample_text = ["Sample text for feature names"]
        X = self.feature_extractor.transform(sample_text)
        n_features = X.shape[1]

        # Create feature importance dictionary
        importance = np.abs(self.classifier.coef_[0])
        feature_importance = {
            f"feature_{i}": importance[i]
            for i in range(n_features)
        }

        # Sort by absolute importance
        return dict(sorted(
            feature_importance.items(),
            key=lambda x: abs(x[1]),
            reverse=True
        ))

    def save(self, path):
        """
        Save model components
        """
        joblib.dump({
            'classifier': self.classifier,
            'scaler': self.scaler
        }, path)

    @classmethod
    def load(cls, path):
        """
        Load saved model
        """
        components = joblib.load(path)
        model = cls()
        model.classifier = components['classifier']
        model.scaler = components['scaler']
        return model

In [None]:
url = "https://raw.githubusercontent.com/skgabriel/cs162-final-dev/refs/heads/main/german_wikipedia.jsonl"
dev_df = pd.read_json(url, lines=True)
human_texts = dev_df['human_text'].tolist()
machine_texts = dev_df['machine_text'].tolist()

texts = human_texts + machine_texts
labels = [0] * len(human_texts) + [1] * len(machine_texts)

# url = "https://raw.githubusercontent.com/skgabriel/cs162-final-dev/refs/heads/main/toefl.json"
# dev_df = pd.read_json(url)
# human_texts = dev_df['document'].tolist()
# machine_texts = []

# texts = human_texts
# labels = [0] * len(human_texts)


In [None]:
roberta_model = HuggingFaceTransformer.load("/content/drive/MyDrive/hf_roberta_new")

preds = roberta_model.predict(texts)

from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

accuracy = accuracy_score(labels, preds)
precision = precision_score(labels, preds, pos_label=1)
recall = recall_score(labels, preds, pos_label=1)
f1 = f1_score(labels, preds, pos_label=1)

print(f"AI Accuracy:  {accuracy:.3f}")
print(f"AI Precision: {precision:.3f}")
print(f"AI Recall:    {recall:.3f}")
print(f"AI F1 Score:  {f1:.3f}")

In [None]:
stylometric_model = StylometricClassifier.load("/content/drive/MyDrive/stylometric_model.pkl")
preds = stylometric_model.predict(texts)

from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

accuracy = accuracy_score(labels, preds)
precision = precision_score(labels, preds, pos_label=1)
recall = recall_score(labels, preds, pos_label=1)
f1 = f1_score(labels, preds, pos_label=1)

print("\n")
print(f"AI Accuracy:  {accuracy:.3f}")
print(f"AI Precision: {precision:.3f}")
print(f"AI Recall:    {recall:.3f}")
print(f"AI F1 Score:  {f1:.3f}")

In [None]:
def predict_ensemble(texts, hf_model, stylometric_model, weight_hf=0.5, weight_stylo=0.5):

    # HuggingFace predictions
    probs_roberta = hf_model.predict_proba(texts)

    # Stylometric predictions
    probs_stylo = stylometric_model.predict_proba(texts)

    # Ensemble prediction (weighted average)
    final_probs = weight_hf * probs_roberta + weight_stylo * probs_stylo
    final_preds = (final_probs >= 0.5).astype(int)

    return final_preds, final_probs

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, classification_report
stylometric_model = StylometricClassifier.load("/content/drive/MyDrive/stylometric_model.pkl")
roberta_model = HuggingFaceTransformer.load("/content/drive/MyDrive/hf_roberta_new")

preds,_ = predict_ensemble(texts, roberta_model, stylometric_model, weight_hf=0.3, weight_stylo=0.7)

# print(preds)

accuracy = accuracy_score(labels, preds)
precision = precision_score(labels, preds, pos_label=1)
recall = recall_score(labels, preds, pos_label=1)
f1 = f1_score(labels, preds, pos_label=1)
print("\n")

#change title description based on weight choices and test dataset choice
print("========== Ensemble Evaluation Summary (Weighted: 0.3 RoBERTa / 0.7 Stylometric) ==========\n")
#change description to match dataset used
print(f"Dataset: hewlett")
print(f"Total Samples: {len(labels)}")
print(f"Human Samples: {len(human_texts)} | AI Samples: {len(machine_texts)}\n")

print("Overall Performance (AI Class = 1):")
print(f"  Accuracy : {accuracy:.3f}")
print(f"  Precision: {precision:.3f} (How many predicted AIs were actually AI)")
print(f"  Recall   : {recall:.3f} (How many actual AIs were correctly detected)")
print(f"  F1 Score : {f1:.3f} (Harmonic mean of precision and recall)\n")

print("Classification Report (per class):")
print(classification_report(labels, preds, target_names=["Human", "AI"]))
