## ELAN remove tiers

remove all tiers from ELAN files which are not wanted

1. load the files
2. identify tiers to remove
3. remove the tiers
4. save the files

In [6]:
# find all tiers

import os
import xml.etree.ElementTree as ET


def extract_tier_ids(folder_path):
    tier_ids = set()  # Use a set to avoid duplicate values

    for filename in os.listdir(folder_path):
        if filename.endswith(".eaf"):
            file_path = os.path.join(folder_path, filename)
            #print (file_path)
            try:
                # Parse the XML file
                tree = ET.parse(file_path)
                root = tree.getroot()
                for tier in root.findall(".//TIER"):
                    attributes = tier.attrib
                    tier_ids.add(attributes['TIER_ID'])
                

            except Exception as e:
                print(f"Error processing file {file_path}: {e}")

    return list(tier_ids)

# Example usage
folder_path = './Elan_edited/'
tier_ids_list = extract_tier_ids(folder_path)

# Print the list of unique "TIER_ID" values
print("List of TIER_ID values:")
print(tier_ids_list)
for each in sorted(tier_ids_list):
    print (each)


List of TIER_ID values:
['PNT: Dennis', 'PNT: Dorothy', 'PNT: Jeanie', 'Me_possesive', 'Pacific_PNT: Amelia', 'Pacific_PNT: Cameron', 'LaB-CATT_Corrections', 'PNT: Nigel', 'PNT: Jocelyn', 'PNT: Sandra', 'Pacific_PNT: Nadia', 'SP: Male2', 'PNT: Maruccia', 'PNT: Alena', 'INT: Angela', 'Pacific_PNT: Renata', 'LaBB-CAT_PNT: Demitrius', 'PNT: Janice', 'LaBB-CAT_PNT: Jacqueline', 'Pacific_PNT: Nate', 'INT: Lucy', 'PNT: Liliana', 'LaBB-CAT_PNT: Aida', 'Pacific_PNT: Fabio', 'LaBB-CAT_PNT: Fred', 'INT: Beth', 'PNT: Alonso', 'PNT: Kim', 'LaBB-CAT_PNT: Emma', 'Pacific_PNT: Jaden', 'Pacific_PNT: Carlotta', 'PNT: Geoff', 'LaBB-CAT_PNT: Tammy', 'Pacific_PNT: Roseta', 'LaBB-CAT_PNT: Mable', 'LaBB-CAT_PNT: Naomi', 'LaBB-CAT_PNT: Donatella', 'PNT: Brittany', 'SP: Paul', 'Pacific_PNT: Nathaniel', 'PNT: Carmel', 'Pacific_PNT: Emilio', 'Pacific_INT: Brett', 'PNT: Alison', 'PNT: Darlene', 'Pacific_PNT: Bridget', 'LaBB-CAT_PNT: Lily', 'PNT: Danielle', 'LaBB-CAT_PNT: Callum', 'PNT: Pete', 'LaBB-CAT_PNT: Dieg

In [3]:
import os
import xml.etree.ElementTree as ET

removeList = ["Demographic_info", "Word_list", "Comments", "Section to edit", "Australian_English", "Sections",
             "Comments CT", "Comments KG", "Social_info" ]

startList = ["Pacific"]

def remove_tags_by_attribute(xml_file_path, output_folder, remove_list):
    # Parse the XML file
    tree = ET.parse(xml_file_path)
    root = tree.getroot()

    for each in removeList: 
        # Find and remove specific "TIER" tags based on the "TIER_ID" attribute
        for tier in root.findall(".//TIER[@TIER_ID='" + each + "']"):
            root.remove(tier)
    
    for wordBeginning in startList: 
        # Find and remove specific "TIER" tags based on the "TIER_ID" attribute
        
        for tier in root.findall(".//TIER"):
            attributes = tier.attrib
            if attributes['TIER_ID'].startswith(wordBeginning):
                print(f"Attributes for TIER tag: {attributes}")
                root.remove(tier)
    

    # Save the modified XML to the output folder
    output_file_path = os.path.join(output_folder, os.path.basename(xml_file_path))
    tree.write(output_file_path, encoding="utf-8", xml_declaration=True)

# Example usage
input_folder_path = 'C:\\Users\\barth\\Documents\\LDACA\\AusESL\\edited_elan\\'
output_folder_path = 'C:\\Users\\barth\\Documents\\LDACA\\AusESL\\edited_elan\\'
#attribute_value_to_remove = 'your_target_value'

# Create the output folder if it doesn't exist
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

# Process each XML file in the input folder
for xml_file_path in os.listdir(input_folder_path):
    if xml_file_path.endswith(".eaf"):
        
        xml_file_path_full = os.path.join(input_folder_path, xml_file_path)
        #print (xml_file_path_full)
        remove_tags_by_attribute(xml_file_path_full, output_folder_path, removeList)
        #print(f"File {xml_file_path_full} processed and saved to {output_folder_path}")

print ("+++ DONE +++")

Attributes for TIER tag: {'DEFAULT_LOCALE': 'us', 'LINGUISTIC_TYPE_REF': 'default', 'TIER_ID': 'Pacific_PNT: Stella '}
Attributes for TIER tag: {'DEFAULT_LOCALE': 'es', 'LINGUISTIC_TYPE_REF': 'default', 'TIER_ID': 'Pacific_INT: Sylvie'}
Attributes for TIER tag: {'DEFAULT_LOCALE': 'es', 'LINGUISTIC_TYPE_REF': 'default', 'TIER_ID': 'Pacific_INT: Sylvie'}
Attributes for TIER tag: {'DEFAULT_LOCALE': 'us', 'LINGUISTIC_TYPE_REF': 'default', 'TIER_ID': 'Pacific_PNT: Tanya '}
Attributes for TIER tag: {'DEFAULT_LOCALE': 'us', 'LINGUISTIC_TYPE_REF': 'default', 'TIER_ID': 'Pacific_PNT: Kelsey '}
Attributes for TIER tag: {'DEFAULT_LOCALE': 'es', 'LINGUISTIC_TYPE_REF': 'default', 'TIER_ID': 'Pacific_INT: Sylvie'}
Attributes for TIER tag: {'DEFAULT_LOCALE': 'es', 'LINGUISTIC_TYPE_REF': 'default', 'TIER_ID': 'Pacific_INT: Sylvie'}
Attributes for TIER tag: {'DEFAULT_LOCALE': 'us', 'LINGUISTIC_TYPE_REF': 'default', 'TIER_ID': 'Pacific_PNT: Joyce '}
Attributes for TIER tag: {'DEFAULT_LOCALE': 'us', 'LIN

Attributes for TIER tag: {'DEFAULT_LOCALE': 'es', 'LINGUISTIC_TYPE_REF': 'default', 'TIER_ID': 'Pacific_INT: Katya'}
Attributes for TIER tag: {'DEFAULT_LOCALE': 'us', 'LINGUISTIC_TYPE_REF': 'default', 'TIER_ID': 'Pacific_PNT: Rita '}
Attributes for TIER tag: {'DEFAULT_LOCALE': 'es', 'LINGUISTIC_TYPE_REF': 'default', 'TIER_ID': 'Pacific_INT: Yelena'}
Attributes for TIER tag: {'DEFAULT_LOCALE': 'us', 'LINGUISTIC_TYPE_REF': 'default', 'TIER_ID': 'Pacific_PNT: Victoria '}
Attributes for TIER tag: {'DEFAULT_LOCALE': 'es', 'LINGUISTIC_TYPE_REF': 'default', 'TIER_ID': 'Pacific_INT: Yelena'}
Attributes for TIER tag: {'DEFAULT_LOCALE': 'us', 'LINGUISTIC_TYPE_REF': 'default', 'TIER_ID': 'Pacific_PNT: Elena '}
Attributes for TIER tag: {'DEFAULT_LOCALE': 'es', 'LINGUISTIC_TYPE_REF': 'default', 'TIER_ID': 'Pacific_INT: Yelena'}
Attributes for TIER tag: {'DEFAULT_LOCALE': 'us', 'LINGUISTIC_TYPE_REF': 'default', 'TIER_ID': 'Pacific_PNT: Valerie '}
Attributes for TIER tag: {'DEFAULT_LOCALE': 'es', 'LI