In [3]:
import pandas as pd
import json

# Read the Excel file
df = pd.read_excel('equality_act_annotations.xlsx')

# Convert DataFrame to dictionary/JSON format
json_data = df.to_dict(orient='records')

# Save to JSON file
with open('equalty_act_annotations.json', 'w', encoding='utf-8') as f:
    json.dump(json_data, f, ensure_ascii=False, indent=4)

In [3]:
    def get_neighbours_from_title_tree(file_name="title_tree.json", target_id=None):
        """
        Reads the title_tree.json file and retrieves the neighbors (previous and next) of a given ID.

        Args:
            file_name (str): Path to the title tree JSON file.
            target_id (str): The ID of the element for which neighbors are to be found.

        Returns:
            dict: A dictionary containing the previous and next neighbors' IDs and titles.
        """
        with open(file_name, "r", encoding="utf-8") as f:
            title_tree = json.load(f)

        def find_neighbors(tree, target_id):
            """
            Recursively searches the tree to find the neighbors of the target ID.

            Args:
                tree (list): The hierarchical tree structure.
                target_id (str): The ID to search for.

            Returns:
                tuple: (previous, current, next) elements.
            """
            flat_list = []

            def flatten_tree(elements, parent=None):
                for i, element in enumerate(elements):
                    flat_list.append((element, parent))
                    if element["children"]:
                        flatten_tree(element["children"], element)

            flatten_tree(tree)

            for i, (element, parent) in enumerate(flat_list):
                if element["id"] == target_id:
                    prev_elem = flat_list[i - 1][0] if i > 0 else None
                    next_elem = flat_list[i + 1][0] if i < len(flat_list) - 1 else None
                    return {
                        "previous": {"id": prev_elem["id"], "title": prev_elem["title"]} if prev_elem else None,
                        "current": {"id": element["id"], "title": element["title"]},
                        "next": {"id": next_elem["id"], "title": next_elem["title"]} if next_elem else None,
                    }
            return None

        return find_neighbors(title_tree, target_id)


    # Example usage:
    target_id = "some-section-id"
    neighbors = get_neighbours_from_title_tree("title_tree.json", "schedule-3-part-9")
    print("Neighbors:", neighbors)

Neighbors: {'previous': {'id': 'schedule-3-part-8', 'title': 'Untitled'}, 'current': {'id': 'schedule-3-part-9', 'title': 'Untitled'}, 'next': {'id': 'schedule-3-part-10', 'title': 'Untitled'}}


In [1]:
import json
import pandas as pd
import os
# Initialize counters
no_conditions = 0
matching_sections = 0
non_matching = []

# Read JSON file
current_dir = os.getcwd()
    
# Construct paths relative to current directory
annotated_file = os.path.join(current_dir,'outputs','Police_Act_2022.json')
with open(annotated_file, 'r', encoding='utf-8') as f:
    data = json.load(f)

# Analyze each annotation
for annotation in data:
    conditions = annotation.get('conditions', [])
    act_section = annotation.get('act_section')
    condiations_sections = [condition.get('section') for condition in conditions]
    # Count annotations without conditions
    if len(conditions)<1:
        no_conditions += 1
    # Check if act_section matches any condition section
    
    elif act_section in condiations_sections:
        matching_sections += 1
    # Collect non-matching annotations
    else:
        non_matching.append(annotation)

# Print report
print(f"Annotations without condition_sections: {no_conditions}")
print(f"Annotations where act_section matches a condition: {matching_sections}")
print(f"\nAnnotations where act_section is not in condition_sections:")
for item in non_matching:
    print(f"Act section: {item['act_section']}, Conditions: {item['conditions']}")


Annotations without condition_sections: 136
Annotations where act_section matches a condition: 136

Annotations where act_section is not in condition_sections:
Act section: https://www.legislation.gov.uk/ukpga/2022/32/section/101, Conditions: [{'type': 'WHEN/IF/WHERE', 'text': 'the diversionary caution is given to a relevant foreign offender.', 'section': 'https://www.legislation.gov.uk/ukpga/2022/32/section/103'}]
Act section: https://www.legislation.gov.uk/ukpga/2022/32/section/115, Conditions: [{'type': 'WHEN/IF/WHERE', 'text': 'a diversionary caution is given', 'section': 'https://www.legislation.gov.uk/ukpga/2022/32/section/103'}, {'type': 'ONLY IF', 'text': 'the offender fails without reasonable excuse to comply with any of the conditions attached to the caution.', 'section': 'https://www.legislation.gov.uk/ukpga/2022/32/section/105'}]
Act section: https://www.legislation.gov.uk/ukpga/2022/32/section/115, Conditions: [{'type': 'WHEN/IF/WHERE', 'text': 'a community caution is give

In [9]:
import json
import pandas as pd
import os
# Initialize counters
no_conditions = 0
matching_sections = 0
non_matching = []

# Read JSON file
current_dir = os.getcwd()
    
# Construct paths relative to current directory
annotated_file = os.path.join(current_dir,'outputs','Equality_Act_2010.json')
with open(annotated_file, 'r', encoding='utf-8') as f:
    data = json.load(f)

# Analyze each annotation
for annotation in data:
    conditions = annotation.get('conditions', [])
    act_section = annotation.get('main_section')
    condiations_sections = [condition.get('section') for condition in conditions]
    # Count annotations without conditions
    if len(conditions)<1:
        no_conditions += 1
    # Check if act_section matches any condition section
    
    elif act_section in condiations_sections:
        matching_sections += 1
    # Collect non-matching annotations
    else:
        non_matching.append(annotation)

# Print report
print(f"Annotations without condition_sections: {no_conditions}")
print(f"Annotations where act_section matches a condition: {matching_sections}")
print(f"\nAnnotations where act_section is not in condition_sections:")
for item in non_matching:
    print(f"Act section: {item['main_section']}, Conditions: {item['conditions']}")

Annotations without condition_sections: 241
Annotations where act_section matches a condition: 146

Annotations where act_section is not in condition_sections:
Act section: https://www.legislation.gov.uk/ukpga/2010/15/section/101, Conditions: [{'type': 'UNLESS', 'text': 'the protected characteristic is marriage or civil partnership, or it is a case of discrimination, harassment or victimisation— (a) that is prohibited by Part 3 (services and public functions), Part 4 (premises), Part 5 (work) or Part 6 (education), or\\ (b) that would be so prohibited but for an express exception. or a person is acting in accordance with selection arrangements.', 'section': 'https://www.legislation.gov.uk/ukpga/2010/15/section/100'}]
Act section: https://www.legislation.gov.uk/ukpga/2010/15/section/101, Conditions: [{'type': 'UNLESS', 'text': 'the protected characteristic is marriage or civil partnership, or it is a case of discrimination, harassment or victimisation— (a) that is prohibited by Part 3 (

In [7]:
import json
import pandas as pd
# Initialize counters
no_conditions = 0
matching_sections = 0
non_matching = []

# Read JSON file
with open('annotations_police.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# Analyze each annotation
for annotation in data:
    conditions = annotation.get('condition_sections', [])
    act_section = annotation.get('act_section')
    
    # Count annotations without conditions
    if not conditions:
        no_conditions += 1
    # Check if act_section matches any condition section
    elif act_section in conditions.values():
        matching_sections += 1
    # Collect non-matching annotations
    else:
        non_matching.append(annotation)

# Print report
print(f"Annotations without condition_sections: {no_conditions}")
print(f"Annotations where act_section matches a condition: {matching_sections}")
print(f"\nAnnotations where act_section is not in condition_sections:")
for item in non_matching:
    print(f"Act section: {item['act_section']}, Conditions: {item['condition_sections']}")


Annotations without condition_sections: 138
Annotations where act_section matches a condition: 154

Annotations where act_section is not in condition_sections:


In [12]:
import json
import pandas as pd
# Initialize counters
no_conditions = 0
matching_sections = 0
non_matching = []

# Read JSON file
with open('equalty_act_annotations2.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# Analyze each annotation
for annotation in data:
    conditions = annotation.get('condition_sections', [])
    act_section = annotation.get('act_section')
    
    # Count annotations without conditions
    if not conditions:
        no_conditions += 1
    # Check if act_section matches any condition section
    elif act_section in conditions.values():
        matching_sections += 1
    # Collect non-matching annotations
    else:
        non_matching.append(annotation)

# Print report
print(f"Annotations without condition_sections: {no_conditions}")
print(f"Annotations where act_section matches a condition: {matching_sections}")
print(f"\nAnnotations where act_section is not in condition_sections:")
for item in non_matching:
    print(f"Act section: {item['act_section']}, Conditions: {item['condition_sections']}")


Annotations without condition_sections: 243
Annotations where act_section matches a condition: 150

Annotations where act_section is not in condition_sections:
Act section: section-101, Conditions: {'unless_0': 'section-100'}
Act section: section-101, Conditions: {'unless_0': 'section-100'}
Act section: section-101, Conditions: {'unless_0': 'section-100'}
Act section: section-101, Conditions: {'unless_0': 'section-100'}
Act section: section-101, Conditions: {'unless_0': 'section-100'}
Act section: section-101, Conditions: {'unless_0': 'section-100'}
Act section: section-101, Conditions: {'unless_0': 'section-100'}
Act section: section-102, Conditions: {'unless_0': 'section-100'}
Act section: section-102, Conditions: {'unless_0': 'section-100'}
Act section: section-102, Conditions: {'unless_0': 'section-100'}
Act section: section-102, Conditions: {'unless_0': 'section-100'}
Act section: section-102, Conditions: {'unless_0': 'section-100'}
Act section: section-108, Conditions: {'unless_0

In [6]:
def get_leaf_ids_from_title_tree(file_name="title_tree.json"):
    """
    Reads the title_tree.json file and retrieves IDs of leaf nodes (nodes with no children).

    Args:
        file_name (str): Path to the title tree JSON file.

    Returns:
        list: A list of IDs of leaf nodes, excluding 'unknown'.
    """
    with open(file_name, "r", encoding="utf-8") as f:
        title_tree = json.load(f)

    def get_leaf_nodes(elements):
        leaf_ids = []
        for element in elements:
            if element["id"] != "unknown":
                # Only append if the element has no children
                if not element["children"]:
                    leaf_ids.append(element["id"])
                # Recursively check children
                leaf_ids.extend(get_leaf_nodes(element["children"]))
        return leaf_ids

    return get_leaf_nodes(title_tree)

# Get leaf node IDs and store them in a JSON file
leaf_ids = get_leaf_ids_from_title_tree("title_tree.json")

# Save to JSON file
with open('section_list.json', 'w', encoding='utf-8') as f:
    json.dump(leaf_ids, f, ensure_ascii=False, indent=4)

print("Leaf node IDs saved to section_list.json")


Leaf node IDs saved to section_list.json
