## ELAN remove tiers

remove all tiers from ELAN files which are not wanted

1. load the files
2. identify tiers to remove
3. remove the tiers
4. save the files

<br>

<div class="warning" style='padding:0.1em; background-color: #FDAE44; color:#51247a; border-style: solid; border-color: #CC5500 '>
<span>
<p style='margin-top:1em; text-align:center'>
<b>Never use this script on your main files. always use it on a copy of your files! </b> 
<br>
</p>
<p style='margin-left:1em;'></p></span>
</div>



In [None]:
# find all tiers

import os
import xml.etree.ElementTree as ET


def extract_tier_ids(folder_path):
    tier_ids = set()  # Use a set to avoid duplicate values

    for filename in os.listdir(folder_path):
        if filename.endswith(".eaf"):
            file_path = os.path.join(folder_path, filename)
            #print (file_path)
            try:
                # Parse the XML file
                tree = ET.parse(file_path)
                root = tree.getroot()
                for tier in root.findall(".//TIER"):
                    attributes = tier.attrib
                    tier_ids.add(attributes['TIER_ID'])
                

            except Exception as e:
                print(f"Error processing file {file_path}: {e}")

    return list(tier_ids)

# Example usage
folder_path = './Elan_edited/'
tier_ids_list = extract_tier_ids(folder_path)

# Print the list of unique "TIER_ID" values
print("List of TIER_ID values:")
print(tier_ids_list)
for each in sorted(tier_ids_list):
    print (each)


In [None]:
import os
import xml.etree.ElementTree as ET


# edit this list to include every TIER_ID you want to remove 
removeList = ["Demographic_info", "Word_list", "Comments", "Section to edit", "Australian_English", "Sections",
             "Comments CT", "Comments KG", "Social_info" ]

# edit this list to include TIER_ID beginnings
# If the TIER_ID  starts with any of the strings in this list, it will be removed
startList = ["Pacific"]

def remove_tags_by_attribute(xml_file_path, output_folder, remove_list):
    # Parse the XML file
    tree = ET.parse(xml_file_path)
    root = tree.getroot()

    for each in removeList: 
        # Find and remove specific "TIER" tags based on the "TIER_ID" attribute
        for tier in root.findall(".//TIER[@TIER_ID='" + each + "']"):
            root.remove(tier)
    
    for wordBeginning in startList: 
        # Find and remove specific "TIER" tags based on the "TIER_ID" attribute
        
        for tier in root.findall(".//TIER"):
            attributes = tier.attrib
            if attributes['TIER_ID'].startswith(wordBeginning):
                print(f"Attributes for TIER tag: {attributes}")
                root.remove(tier)
    

    # Save the modified XML to the output folder
    output_file_path = os.path.join(output_folder, os.path.basename(xml_file_path))
    tree.write(output_file_path, encoding="utf-8", xml_declaration=True)

# Example usage
input_folder_path = 'C:\\Users\\barth\\Documents\\LDACA\\AusESL\\edited_elan\\'
output_folder_path = 'C:\\Users\\barth\\Documents\\LDACA\\AusESL\\edited_elan\\'
#attribute_value_to_remove = 'your_target_value'

# Create the output folder if it doesn't exist
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

# Process each XML file in the input folder
for xml_file_path in os.listdir(input_folder_path):
    if xml_file_path.endswith(".eaf"):
        
        xml_file_path_full = os.path.join(input_folder_path, xml_file_path)
        #print (xml_file_path_full)
        remove_tags_by_attribute(xml_file_path_full, output_folder_path, removeList)
        #print(f"File {xml_file_path_full} processed and saved to {output_folder_path}")

print ("+++ DONE +++")