# Load your dataset to Argilla

In [None]:
# Install required libraries for Argilla and Hugging Face datasets
!uv pip install argilla datasets

In [None]:
# Set up Argilla client connection
# Replace the placeholders with your actual Argilla instance details
import argilla as rg

HF_TOKEN = "..."  # only for private spaces - your Hugging Face token

# Initialize the Argilla client with your workspace credentials
client = rg.Argilla(
    api_url="...",  # URL of your Argilla instance (e.g., "https://your-space.hf.space")
    api_key="...",  # Your Argilla API key for authentication
    headers={"Authorization": f"Bearer {HF_TOKEN}"},  # only for private spaces
)

In [None]:
# Load the AG News dataset for text classification
# This dataset contains news articles categorized into 4 classes: World, Sports, Business, Sci/Tech
from datasets import load_dataset

data = load_dataset("SetFit/ag_news", split="train")
# Inspect the dataset structure to understand what fields are available
data.features

In [None]:
# Configure the annotation settings for our dataset
# This defines what annotators will see and what tasks they need to perform
settings = rg.Settings(
    # Define the input field that annotators will see
    fields=[rg.TextField(name="text")],
    # Define the annotation tasks
    questions=[
        # Task 1: Label classification - assign one of the existing categories
        rg.LabelQuestion(
            name="label", 
            title="Classify the text:", 
            labels=data.unique("label_text")  # Use the existing category labels from the dataset
        ),
        # Task 2: Named Entity Recognition - highlight entities in the text
        rg.SpanQuestion(
            name="entities",
            title="Highlight all the entities in the text:",
            labels=["PERSON", "ORG", "LOC", "EVENT"],  # Entity types to annotate
            field="text",  # Field to perform entity annotation on
        ),
    ],
)

In [None]:
# Create the dataset in Argilla with our defined settings
# This sets up the annotation workspace with the specified configuration
dataset = rg.Dataset(name="ag_news", settings=settings)

# Create the dataset on the Argilla server
dataset.create()

In [None]:
# Upload the data to Argilla for annotation
# This loads all the news articles into the annotation interface
# mapping parameter connects the dataset's "label_text" field to Argilla's "label" question
dataset.records.log(data, mapping={"label_text": "label"})