In [8]:
import csv

def read_ann_file(ann_file):
    """
    Reads the .ann file and extracts the label and annotated text.
    """
    annotations = []
    with open(ann_file, 'r', encoding='utf-8') as f:
        for line in f:
            parts = line.strip().split("\t")
            # Process only annotation lines (ignore relations)
            if len(parts) > 1 and parts[0].startswith('T'):
                label = parts[1].split(" ")[0]  # Extract label
                text = parts[-1]  # Extract the text that is annotated
                annotations.append((label, text))
    return annotations

def create_csv_from_ann(text_file, ann_file, output_csv):
    """
    Reads the text from the text file, extracts annotations from the .ann file,
    and writes them to a CSV file.
    """
    # Read the text from the corresponding text file
    with open(text_file, 'r', encoding='utf-8') as f:
        text = f.read().strip()

    # Read the annotations from the .ann file
    annotations = read_ann_file(ann_file)

    # Prepare data for CSV
    data = []
    for label, text_content in annotations:
        data.append([text_content, label])

    # Write the extracted annotations to the CSV file
    with open(output_csv, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['Text', 'Label'])  # CSV headers
        writer.writerows(data)

# Example usage
text_file = 'A01.txt'  # Path to the text file that corresponds to the .ann file
ann_file = 'A01.ann'   # Path to the .ann file
output_csv = 'output.csv'   # Path to the output CSV file

create_csv_from_ann(text_file, ann_file, output_csv)
