
### Batch annotation script for complete coding scheme.
#### Annotates articles with both standard and extended coding schemes for all layers and tagsets.


In [3]:
import os
import sys
import json
import argparse
from pathlib import Path
from typing import List, Optional

# Add parent directory to path to import our modules
sys.path.append('..')

# Import our custom modules
from src.agent.utils import (
    get_project_root, 
    get_articles_paths,
    create_extended_coding_scheme
)
from src.agent.annotation import annotate_article

# Load environment variables
from dotenv import load_dotenv
load_dotenv()


def create_extended_scheme_if_not_exists():
    """Create extended coding scheme if it doesn't exist."""
    try:
        from src.agent.utils import load_coding_scheme
        extended_scheme = load_coding_scheme(scheme_name="Coding_Scheme_Extended")
        print("Extended coding scheme already exists.")
        return True
    except FileNotFoundError:
        print("Creating extended coding scheme...")
        try:
            create_extended_coding_scheme(
                output_name="Coding_Scheme_Extended",
                min_occurrences=1,
                max_examples=10
            )
            print("Extended coding scheme created successfully.")
            return True
        except Exception as e:
            print(f"Error creating extended coding scheme: {e}")
            return False


def annotate_single_article(article_id: str, verbose: bool = True) -> tuple:
    """
    Annotate a single article with both standard and extended schemes.
    
    Args:
        article_id: ID of the article to annotate
        verbose: Whether to print progress messages
        
    Returns:
        tuple: (success: bool, error_message: str)
    """
    try:
        if verbose:
            print(f"\nAnnotating {article_id}...")
        
        # Get article directory
        root_dir = get_project_root()
        article_dir = os.path.join(root_dir, 'data', '03b_processed_to_json', article_id)
        
        if not os.path.exists(article_dir):
            error_msg = f"Article directory not found: {article_dir}"
            if verbose:
                print(f"  ERROR: {error_msg}")
            return False, error_msg
        
        # Annotate with standard scheme (no examples, complete scheme)
        if verbose:
            print("  Annotating with standard scheme...")
        
        standard_annotations = annotate_article(
            article_id=article_id,
            layers=None,  # Use all layers
            tagsets=None, # Use all tagsets
            num_examples=0,  # No few-shot examples
            save_result=False,  # Don't save as default name
            use_extended_scheme=False,
            scheme_name="Coding_Scheme"
        )
        
        # Save standard annotations
        standard_path = os.path.join(article_dir, 'Generated_Annotations_Standard.json')
        with open(standard_path, 'w') as f:
            json.dump(standard_annotations, f, indent=2)
        
        if verbose:
            print(f"    Saved {len(standard_annotations)} annotations")
        
        # Annotate with extended scheme (with examples, complete scheme)
        if verbose:
            print("  Annotating with extended scheme...")
        
        extended_annotations = annotate_article(
            article_id=article_id,
            layers=None,  # Use all layers
            tagsets=None, # Use all tagsets
            num_examples=0,  # No few-shot examples (examples are in the scheme)
            save_result=False,  # Don't save as default name
            use_extended_scheme=True,
            scheme_name="Coding_Scheme_Extended"
        )
        
        # Save extended annotations
        extended_path = os.path.join(article_dir, 'Generated_Annotations_Extended.json')
        with open(extended_path, 'w') as f:
            json.dump(extended_annotations, f, indent=2)
        
        if verbose:
            print(f"    Saved {len(extended_annotations)} annotations")
        
        return True, ""
        
    except Exception as e:
        error_msg = f"Error annotating {article_id}: {str(e)}"
        if verbose:
            print(f"  ERROR: {error_msg}")
        return False, error_msg

In [5]:
 success, error_msg =annotate_single_article(article_id ="EU_32019R0631_Title_0_Chapter_0_Section_0_Article_17", verbose = False) 

  - Policydesigncharacteristics/Time/Time_PolDuration: 'for a period of six years from 15 may 2019'
  - Policydesigncharacteristics/Reference/Ref_Strategy_Agreement: 'the interinstitutional agreement of 13 april 2016 on better law-making'
  - Policydesigncharacteristics/Time/Time_PolDuration: 'period of six years from 15 may 2019'
  - Policydesigncharacteristics/Reference/Ref_Strategy_Agreement: 'interinstitutional agreement of 13 april 2016 on better law-making'
