# Imports & Installation

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install datasets

Collecting datasets
  Downloading datasets-3.5.1-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.5.1-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.4/491.4 kB[0m [31m15.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2025.3.0-py3-none-any.whl (

In [None]:
import torch
import os
import json
from transformers import AutoTokenizer, AutoModelForSequenceClassification, EarlyStoppingCallback
from transformers import Trainer, TrainingArguments
import pandas as pd
import numpy as np
from datasets import load_from_disk, load_dataset, Dataset

from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    f1_score,
    precision_score,
    recall_score,
    accuracy_score,
    precision_recall_fscore_support,
    classification_report
)

In [None]:
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords

# Read in Data
Assumes that the data is saved as three different Arrow-type datasets.

In [None]:
# Load datasets (non-tokenized)
# 'text' is list of strings
# 'labels' is list of integers, positive = 1 = arxiv
# train, val, & test all have 50/50 distribution of arxiv and vixra examples
DATA_DIR = '/content/drive/MyDrive/sp25/paper-moderation-SP25/'
train_dataset = load_from_disk(DATA_DIR + "/physics_subset_train.arrow")
val_dataset = load_from_disk(DATA_DIR + "/physics_subset_val.arrow")
test_dataset = load_from_disk(DATA_DIR + "/physics_subset_test.arrow")

# Bag of Words Baseline

In [None]:
train_size = 10000
val_size = 2000

X_train = train_dataset['text'][:train_size]
X_test = val_dataset['text'][:val_size]
y_train = train_dataset['labels'][:train_size]
y_test = val_dataset['labels'][:val_size]

In [None]:
text_clf = Pipeline([
    ('vect', CountVectorizer(stop_words=list(stopwords.words('english')), ngram_range=(1, 2), min_df=0.05)),
    ('clf', LogisticRegression(random_state=42, max_iter=1000))
])
# Train the model
text_clf.fit(X_train, y_train)

# Make predictions
y_pred = text_clf.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.956

Confusion Matrix:
[[1065   37]
 [  51  847]]

Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.97      0.96      1102
           1       0.96      0.94      0.95       898

    accuracy                           0.96      2000
   macro avg       0.96      0.95      0.96      2000
weighted avg       0.96      0.96      0.96      2000



In [None]:
if hasattr(text_clf['clf'], 'coef_'):
    feature_names = text_clf['vect'].get_feature_names_out()
    coefs = text_clf['clf'].coef_[0]

    # Create a DataFrame to sort features by coefficient value
    feature_importance = pd.DataFrame({
        'feature': feature_names,
        'coefficient': coefs
    })

    # Sort by coefficient value to get most positive and most negative features
    feature_importance = feature_importance.sort_values('coefficient', ascending=False)

    print("Top positive features (most predictive of positive class):")
    print(feature_importance.head(10).to_string(index=False))

    print("\nTop negative features (most predictive of negative class):")
    print(feature_importance.tail(10).sort_values('coefficient').to_string(index=False))

Top positive features (most predictive of positive class):
   feature  coefficient
     video     0.779024
       cm2     0.488253
       sys     0.433185
      tion     0.429000
      view     0.423504
       von     0.387996
   unknown     0.380963
references     0.374462
    define     0.372134
       los     0.370336

Top negative features (most predictive of negative class):
 feature  coefficient
     eld    -0.734190
abstract    -0.683912
    2020    -0.556857
   chaos    -0.445842
     org    -0.375466
     pdf    -0.368675
     etc    -0.363122
   email    -0.361040
      dr    -0.323459
  planck    -0.306777


# RoBerTa Baseline

In [None]:
# TODO: change this path to your Google Drive Folder
DATA_DIR = '/content/drive/MyDrive/sp25/paper-moderation-SP25'

# Output directory for results
OUTPUT_DIR = '/content/drive/MyDrive/sp25/paper-moderation-SP25/seg-baseline-results/'

# RoBERTa model to use
MODEL_NAME = "roberta-base"

# Whether to apply text segmentation
USE_SEGMENTATION = False

# Run both with and without segmentation for comparison
RUN_BOTH = True

# Training parameters
BATCH_SIZE = 32
LEARNING_RATE = 2e-5
NUM_EPOCHS = 2

# Random seed for reproducibility
SEED = 42

In [None]:
def compute_metrics(pred):
    """
    Compute metrics for model evaluation.

    Args:
        pred: Prediction output from trainer.predict()

    Returns:
        dict: Dictionary of metrics
    """
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, preds, average='macro', zero_division=0
    )
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

In [None]:
def run_experiment(data_dir, output_dir, model_name, use_segmentation,
                  batch_size, learning_rate, num_epochs):
    """
    Run a single experiment with or without segmentation.

    Args:
        data_dir: Directory with HuggingFace datasets
        output_dir: Directory to save results
        model_name: RoBERTa model name
        use_segmentation: Whether to apply text segmentation
        batch_size: Batch size for training
        learning_rate: Learning rate
        num_epochs: Number of epochs

    Returns:
        dict: Evaluation metrics
    """
    # Create experiment directory
    exp_name = f"roberta_{model_name.split('/')[-1]}"
    exp_name += "_with_segmentation" if use_segmentation else "_baseline"
    exp_dir = os.path.join(output_dir, exp_name)
    os.makedirs(exp_dir, exist_ok=True)

    print(f"Running {'segmented' if use_segmentation else 'baseline'} experiment")

    # Load datasets (non-tokenized)
    # 'text' is list of strings
    # 'labels' is list of integers, positive = 1 = arxiv
    # train, val, & test all have 50/50 distribution of arxiv and vixra examples
    train_dataset = load_from_disk(data_dir + "/train.arrow")
    val_dataset = load_from_disk(data_dir + "/val.arrow")
    test_dataset = load_from_disk(data_dir + "/test.arrow")

    print(f"Loaded datasets - Train: {len(train_dataset)}, "
              f"Validation: {len(val_dataset)}, Test: {len(test_dataset)}")

    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # Tokenize datasets
    def tokenize_function(examples):
        return tokenizer(
            examples["text"],
            padding="max_length",
            truncation=True
        )

    print("Tokenizing datasets.")
    train_ds = train_dataset.map(tokenize_function, batched=True)
    val_ds = val_dataset.map(tokenize_function, batched=True)
    test_ds = test_dataset.map(tokenize_function, batched=True)

    train_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
    val_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
    test_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

    # Set up training arguments
    training_args = TrainingArguments(
        output_dir=os.path.join(exp_dir, "checkpoints"),
        evaluation_strategy="epoch",     # Keep eval after each epoch to see metrics
        save_strategy="epoch",           # Save after each epoch (needed for load_best_model_at_end)
        save_total_limit=1,              # Only keep the most recent checkpoint
        metric_for_best_model="f1",      # Use f1 to determine best model
        greater_is_better=True,          # Higher f1 is better
        logging_strategy="steps",        # Print logs during training
        logging_steps=200,               # Print metrics every 200 steps
        logging_dir=None,                # Don't save logs to disk
        learning_rate=learning_rate,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        num_train_epochs=num_epochs,
        weight_decay=0.01,
        load_best_model_at_end=True,     # Load the best model at the end of training
        fp16=True,
        report_to=["none"],              # Don't report to any tracking systems
    )

    # Initialize model
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=2,
    )
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # Create trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_ds,
        eval_dataset=val_ds,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
    )

    # Train model
    print(f"Training {'segmented' if use_segmentation else 'baseline'} model")
    trainer.train()
    trainer.save_model(os.path.join(exp_dir, "final_model"))

    # Evaluate model
    print(f"Evaluating {'segmented' if use_segmentation else 'baseline'} model")
    eval_results = trainer.evaluate(test_ds)

    # Get detailed classification report
    predictions = trainer.predict(test_ds)
    preds = predictions.predictions.argmax(-1)
    labels = predictions.label_ids
    class_report = classification_report(labels, preds)

    # Log and save results
    results_str = (
        f"{'Segmented' if use_segmentation else 'Baseline'} Model Results:\n"
        f"Accuracy: {eval_results['eval_accuracy']:.4f}\n"
        f"Precision: {eval_results['eval_precision']:.4f}\n"
        f"Recall: {eval_results['eval_recall']:.4f}\n"
        f"F1 Score: {eval_results['eval_f1']:.4f}\n"
        f"Classification Report:\n{class_report}\n"
    )

    print(results_str)

    # Save results
    with open(os.path.join(exp_dir, 'results.txt'), 'w') as f:
        f.write(results_str)

    return {
        'accuracy': eval_results['eval_accuracy'],
        'precision': eval_results['eval_precision'],
        'recall': eval_results['eval_recall'],
        'f1': eval_results['eval_f1']
    }

In [None]:
# Set random seed
torch.manual_seed(SEED)
np.random.seed(SEED)

# Create output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Run experiments
results = {}

# Run baseline
results['baseline'] = run_experiment(
    data_dir=DATA_DIR,
    output_dir=OUTPUT_DIR,
    model_name=MODEL_NAME,
    use_segmentation=False,
    batch_size=BATCH_SIZE,
    learning_rate=LEARNING_RATE,
    num_epochs=NUM_EPOCHS
)
print("Experiment(s) completed")

with open(os.path.join(OUTPUT_DIR, 'results.json'), 'w') as f:
    json.dump(results, f, indent=4)
print(results)