In [1]:
# The first feedback item:

# IATI Validator Error:
 # The budget status is invalid.
 # "" is not a valid value for attribute @status, in element <budget>

# IATI CoVE Error: Sorry your data contains values in embedded codelists that do not exist in the standard. 
# Error location: iati-activity/budget/@status

# According to VERSION 2.02 of IATI Standard Reference /activity-standard/iati-activities/iati-activity/budget @status attribute for xml element Budget: 
  # Must be of type xsd:string.
  # Must be on the following BudgetStatus codelist:
   # Code: 1, Name:Original, Description: The original budget allocated to the activity
   # Code: 2, Name: Revised, description: The updated budget for an activity
 # If the @status attribute is not present, the budget is assumed to be Indicative: Code 1.

In [2]:
# Importing necessary libraries
import xml.etree.ElementTree as ET

# Loading the XML file with the activity data
file_path = "data/iatiActivity_Albania.xml" 
tree = ET.parse(file_path)
root = tree.getroot()

# Defining the invalid status value from the feedback item
invalid_status_value = ""

# Iterating through budget elements and update invalid status values.

for budget_elem in root.findall(".//budget[@status='{}']".format(invalid_status_value)):
    # Updating the status to valid value '1' (Indicative)
    budget_elem.set("status", "1")

# Saving the updated XML file
tree.write("data/updated_albania_activities.xml")  

In [3]:
# checking if the updated XML file has empty strings for budget @status:

# Loading the updated XML file
updated_file_path = "data/updated_albania_activities.xml"  
updated_tree = ET.parse(updated_file_path)
updated_root = updated_tree.getroot()

# Iterating through budget elements and printing those with empty @status values
for budget_elem in updated_root.findall(".//budget"):
    status_value = budget_elem.get("status", "")
    if status_value == "":
        print(f"Empty status value found in budget element: {ET.tostring(budget_elem).decode()}")

# If nothing is printed, it means there are no budget elements with empty @status values


In [16]:
# The second feedback item:

# IATI Validator Error:
 # The activity scope code is invalid.
 # "Yes" is not a valid value for attribute @code, in element <activity-scope>

# IATI CoVE Error: Sorry your data contains values in embedded codelists that do not exist in the standard.
# Error location: iati-activity/activity-scope/@code

# According to VERSION 2.02 of IATI Standard Reference /activity-standard/iati-activities/iati-activity/activity-scope/ @code attribute for xml element Budget:
 # This attribute is required.
 # This value must be of type xsd:string.
 # This value must be on the ActivityScope codelist: (Code 1 to Cofde 8)

In [5]:
# Defining valid activity scope codes based on the guidance documentation:
valid_activity_scope_codes = set(["1", "2", "3", "4", "5", "6", "7", "8"])

# Iterating through activity-scope elements and update invalid code values
for activity_scope_elem in updated_root.findall(".//activity-scope[@code]"):
    code_value = activity_scope_elem.get("code", "")
    if code_value not in valid_activity_scope_codes:
        # Replace the invalid code with a valid code, e.g., '1' for Global
        activity_scope_elem.set("code", "1")

# Save the XML file with the corrected activity-scope codes
updated_tree.write("data/updated_albania_activities.xml")


In [6]:
# checking if the updated XML file activity-scope element has invalid values:

# Loading the updated XML file
updated_tree = ET.parse("data/updated_albania_activities.xml")  
updated_root = updated_tree.getroot()

# Defining valid activity scope codes based on the guidance documentation
valid_activity_scope_codes = set(["1", "2", "3", "4", "5", "6", "7", "8"])

# Printing activity-scope elements with invalid code values
for activity_scope_elem in updated_root.findall(".//activity-scope[@code]"):
    code_value = activity_scope_elem.get("code", "")
    if code_value not in valid_activity_scope_codes:
        print(f"Invalid Activity-Scope Code: {code_value}")

# If nothing is printed, it means all activity-scope codes are valid


In [7]:
# The third feedback item:

# Ruleset Error: Elements must use a valid format

# Rule: identifier/text() should match the regex [^\:\&\|\?]+

# At: https://iatistandard.org/en/guidance/standard-overview/preparing-your-data/activity-information/creating-iati-identifiers/


# According to IATI documentation rules and guidance on Creating IATI Activity Identifires, all identifire must:

 # Each IATI identifier you publish is unique. This covers all of your activities even if they are in separate files.
 # Each IATI identifier should not start or end with whitespace.
 # Only use numbers, letters and dashes to form your IATI Identifiers.
 # Once you have published an activity its IATI identifier must not change.

In [14]:
import re

# Loading the updated XML file
updated_tree = ET.parse("data/updated_albania_activities.xml")  
updated_root = updated_tree.getroot()

# Defining the regex pattern for the identifier element
identifier_regex_pattern = re.compile(r'^[a-zA-Z0-9]+(?:-[a-zA-Z0-9]+)*$')

# Iterating through identifier elements and updating the text based on the regex pattern
for identifier_elem in updated_root.findall(".//iati-identifier"):
    current_text = identifier_elem.text
    if current_text is not None and not identifier_regex_pattern.fullmatch(current_text):
        # Replacing invalid characters with an empty string
        updated_text = re.sub(r'[^a-zA-Z0-9-]', '', current_text)
        identifier_elem.text = updated_text

# Saving the XML file with the corrected identifier values
updated_tree.write("data/updated_albania_activities.xml")



In [15]:
# checking if the updated XML file identifier/text() element has invalid identifier values:

# Loading the updated XML file
updated_tree = ET.parse("data/updated_albania_activities.xml")
updated_root = updated_tree.getroot()

# Defining the regex pattern for the identifier element
identifier_regex_pattern = re.compile(r'^[a-zA-Z0-9]+(?:-[a-zA-Z0-9]+)*$')

# Checking for invalid identifier values
invalid_identifiers = [elem.text for elem in updated_root.findall(".//iati-identifier") if not identifier_regex_pattern.fullmatch(elem.text)]

# Printing invalid identifier values
for invalid_identifier in invalid_identifiers:
    print(f"Invalid IATI Identifier Value: {invalid_identifier}")
"" is not a valid value for attribute @status, in element <budget>