<a href="https://colab.research.google.com/github/EskimoBrew33/Colab/blob/main/CSS2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import xml.etree.ElementTree as ET
import os
import csv

def flatten_xml(element, parent_path='', flattened_data=None):
  """Flattens an XML element and its children into a list of paths.

  Args:
    element: The XML element to flatten.
    parent_path: The path of the parent element.
    flattened_data: A list to store the flattened data.

  Returns:
    A list of tuples, where each tuple contains the flattened path, the text content of the element, and the tag of the element.
  """
  if flattened_data is None:
    flattened_data = []
  path = f'{parent_path}/{element.tag}' if parent_path else element.tag
  if element.text and element.text.strip():
    flattened_data.append((path, element.text.strip(), element.tag))
  for child in element:
    flatten_xml(child, path, flattened_data)
  return flattened_data

def analyze_acord_files():
  """Analyzes ACORD XML files in the current Colab session and generates a consolidated CSV file."""
  all_data = []
  for filename in os.listdir():
    if filename.endswith('.xml'):
      product = filename[:-4]  # Remove '.xml' extension
      try:
        tree = ET.parse(filename)
        root = tree.getroot()
        flattened_data = flatten_xml(root)
        for path, text, tag in flattened_data:
          all_data.append((path, text, product))
      except ET.ParseError:
        print(f'Error parsing XML file: {filename}')
      except FileNotFoundError:
        print(f'XML file not found: {filename}')

  # Sort data by Product
  all_data.sort(key=lambda x: x[2])

  # Write data to CSV file
  with open('acord_consolidated.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Path', 'Value', 'Product'])
    writer.writerows(all_data)

# Analyze ACORD files in the current Colab session
analyze_acord_files()

In [15]:
from IPython import get_ipython
from IPython.display import display
# %%
import xml.etree.ElementTree as ET
import os
import csv

def preprocess_schema(schema_root):
  """Creates a dictionary of element descriptions from the schema."""
  descriptions = {}
  for element in schema_root.iter():
    if element.tag.endswith('element') and 'name' in element.attrib:
      tag = element.get('name')
      description = None
      for annotation in element.iterfind('.//{http://www.w3.org/2001/XMLSchema}annotation'):
        for documentation in annotation.iterfind('.//{http://www.w3.org/2001/XMLSchema}documentation'):
          description = documentation.text.strip() if documentation.text else None
          break
      descriptions[tag] = description
  return descriptions

def flatten_xml(element, parent_path='', flattened_data=None, descriptions=None):
  """Flattens an XML element and its children into a list of paths (optimized)."""
  if flattened_data is None:
    flattened_data = []
  path = f'{parent_path}/{element.tag}' if parent_path else element.tag
  if element.text and element.text.strip():
    description = descriptions.get(element.tag)
    flattened_data.append((path, element.text.strip(), element.tag, description))
  for child in element:
    flatten_xml(child, path, flattened_data, descriptions)
  return flattened_data

def analyze_acord_files():
  """Analyzes ACORD XML files and generates a consolidated CSV file (optimized)."""
  all_data = []
  schema_tree = ET.parse('TXLife2.45.00fulldoc.xsd')  # Replace with actual file name if different
  schema_root = schema_tree.getroot()
  descriptions = preprocess_schema(schema_root)

  for filename in os.listdir():
    if filename.endswith('.xml'):
      product = filename[:-4]
      try:
        tree = ET.parse(filename)
        root = tree.getroot()
        flattened_data = flatten_xml(root, descriptions=descriptions)
        for path, text, tag, description in flattened_data:
          all_data.append((path, text, product, tag, description))
      except ET.ParseError:
        print(f'Error parsing XML file: {filename}')
      except FileNotFoundError:
        print(f'XML file not found: {filename}')

  # Sort data by Product
  all_data.sort(key=lambda x: x[2])

  # Write data to CSV file
  with open('acord_consolidated.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Path', 'Value', 'Product', 'Tag', 'Description'])
    writer.writerows(all_data)

# Analyze ACORD files in the current Colab session
analyze_acord_files()

In [16]:
import xml.etree.ElementTree as ET

schema_tree = ET.parse('TXLife2.45.00fulldoc.xsd')  # Replace with your actual schema file name
schema_root = schema_tree.getroot()

descriptions = []
for element in schema_root.iter():
  if element.tag.endswith('element') and 'name' in element.attrib:
    for annotation in element.iterfind('.//{http://www.w3.org/2001/XMLSchema}annotation'):
      for documentation in annotation.iterfind('.//{http://www.w3.org/2001/XMLSchema}documentation'):
        descriptions.append(documentation.text.strip() if documentation.text else None)
        if len(descriptions) >= 20:
          break
      if len(descriptions) >= 20:
        break
    if len(descriptions) >= 20:
      break

for description in descriptions:
  print(description)























In [17]:
from IPython import get_ipython
from IPython.display import display
# %%
import xml.etree.ElementTree as ET
import os
import csv

def preprocess_schema(schema_root):
  """Creates a dictionary of element descriptions from the schema."""
  descriptions = {}
  for element in schema_root.iter():
    if element.tag.endswith('}element') and 'name' in element.attrib:  # Handle namespaces in element tag
      tag = element.get('name')
      description = None
      for annotation in element.iterfind('.//{http://www.w3.org/2001/XMLSchema}annotation'):
        for documentation in annotation.iterfind('.//{http://www.w3.org/2001/XMLSchema}documentation'):
          description = documentation.text.strip() if documentation.text else None
          break
      descriptions[tag] = description
  return descriptions

def flatten_xml(element, parent_path='', flattened_data=None, descriptions=None):
  """Flattens an XML element and its children into a list of paths (optimized)."""
  if flattened_data is None:
    flattened_data = []
  path = f'{parent_path}/{element.tag}' if parent_path else element.tag
  if element.text and element.text.strip():
    description = descriptions.get(element.tag)
    flattened_data.append((path, element.text.strip(), element.tag, description))
  for child in element:
    flatten_xml(child, path, flattened_data, descriptions)
  return flattened_data

def analyze_acord_files():
  """Analyzes ACORD XML files and generates a consolidated CSV file (optimized)."""
  all_data = []
  schema_tree = ET.parse('TXLife2.45.00fulldoc.xsd')
  schema_root = schema_tree.getroot()
  descriptions = preprocess_schema(schema_root)

  for filename in os.listdir():
    if filename.endswith('.xsd'):  # Analyze the XSD file first
      try:
        tree = ET.parse(filename)
        root = tree.getroot()
        # You might not need to flatten the XSD schema - depends on your needs
        # flattened_data = flatten_xml(root, descriptions=descriptions)
        # ... process the XSD file as needed ...
      except ET.ParseError:
        print(f'Error parsing XSD file: {filename}')
      except FileNotFoundError:
        print(f'XSD file not found: {filename}')

    if filename.endswith('.xml'):
      product = filename[:-4]
      try:
        tree = ET.parse(filename)
        root = tree.getroot()
        flattened_data = flatten_xml(root, descriptions=descriptions)
        for path, text, tag, description in flattened_data:
          all_data.append((path, text, product, tag, description))
      except ET.ParseError:
        print(f'Error parsing XML file: {filename}')
      except FileNotFoundError:
        print(f'XML file not found: {filename}')

  # Sort data by Product
  all_data.sort(key=lambda x: x[2])

  # Write data to CSV file
  with open('acord_consolidated.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Path', 'Value', 'Product', 'Tag', 'Description'])
    writer.writerows(all_data)

# Analyze ACORD files in the current Colab session
analyze_acord_files()

In [22]:
import xml.etree.ElementTree as ET

def extract_descriptions(schema_file, num_elements=5):
  """Extracts descriptions of elements from an XSD schema file."""
  descriptions = []
  tree = ET.parse(schema_file)
  root = tree.getroot()
  namespaces = {
      'acord': 'http://ACORD.org/Standards/Life/2',  # Default namespace
      'xsd': 'http://www.w3.org/2001/XMLSchema'
  }

  for element in root.iter():
    if element.tag == '{http://www.w3.org/2001/XMLSchema}element' and 'name' in element.attrib:
      for annotation in element.iterfind('.//xsd:annotation', namespaces):
        for documentation in annotation.iterfind('.//xsd:documentation', namespaces):
          description = documentation.text.strip() if documentation.text else None
          if description:
            descriptions.append((element.get('name'), description))
            if len(descriptions) >= num_elements:
              return descriptions
  return descriptions

# Replace 'TXLife2.45.00fulldoc.xsd' with your actual schema file name
schema_file = 'TXLife2.45.00fulldoc.xsd'
descriptions = extract_descriptions(schema_file)

for element_name, description in descriptions:
  print(f"Element: {element_name}\nDescription: {description}\n")

In [23]:
import xml.etree.ElementTree as ET

def extract_descriptions(schema_file, num_elements=5):
  """Extracts descriptions of elements from an XSD schema file."""
  descriptions = []
  tree = ET.parse(schema_file)
  root = tree.getroot()
  namespaces = {
      'acord': 'http://ACORD.org/Standards/Life/2',
      'xsd': 'http://www.w3.org/2001/XMLSchema'
  }

  for element in root.iter():
    if element.tag == '{http://www.w3.org/2001/XMLSchema}element' and 'name' in element.attrib:
      for annotation in element.iterfind('.//xsd:annotation', namespaces):
        for documentation in annotation.iterfind('.//xsd:documentation', namespaces):
          for description_tag in documentation.iterfind('.//Description', namespaces): # Look for <Description> tag
            description = description_tag.text.strip() if description_tag.text else None
            if description:
              descriptions.append((element.get('name'), description))
              if len(descriptions) >= num_elements:
                return descriptions
  return descriptions

schema_file = 'TXLife2.45.00fulldoc.xsd' # Replace with your actual schema file name
descriptions = extract_descriptions(schema_file)

for element_name, description in descriptions:
  print(f"Element: {element_name}\nDescription: {description}\n")

In [26]:
import xml.etree.ElementTree as ET

def get_element_description(schema_file, element_name):
  """Retrieves the description of an element from an ACORD schema."""
  tree = ET.parse(schema_file)
  root = tree.getroot()
  namespaces = {
      'acord': 'http://ACORD.org/Standards/Life/2',  # Replace with actual namespace if different
      'xsd': 'http://www.w3.org/2001/XMLSchema'
  }

  for element in root.iterfind(f'.//xsd:element[@name="{element_name}"]', namespaces):
    for annotation in element.iterfind('.//xsd:annotation', namespaces):
      for documentation in annotation.iterfind('.//xsd:documentation', namespaces):
        # Adjust this part based on how descriptions are structured in your schema
        for description_tag in documentation.iterfind('.//Description', namespaces):
          description = description_tag.text.strip() if description_tag.text else None
          return description

  return None

schema_file = 'TXLife2.45.00fulldoc.xsd'  # Replace with your schema file name
element_name = 'ACORD_PRIVATE_CODE'  # Replace with the element you want to find

description = get_element_description(schema_file, element_name)

if description:
  print(f"Description of element '{element_name}': {description}")
else:
  print(f"No description found for element '{element_name}'")

No description found for element 'ACORD_PRIVATE_CODE'


In [27]:
import xml.etree.ElementTree as ET

def get_element_description(schema_file, element_name):
  """Retrieves the description of an element from an ACORD schema."""
  tree = ET.parse(schema_file)
  root = tree.getroot()
  namespaces = {
      'acord': 'http://ACORD.org/Standards/Life/2',
      'xsd': 'http://www.w3.org/2001/XMLSchema'
  }

  for element in root.iterfind(f'.//xsd:element[@name="{element_name}"]', namespaces):
    for annotation in element.iterfind('.//xsd:annotation', namespaces):
      for documentation in annotation.iterfind('.//xsd:documentation', namespaces):
        for description_tag in documentation.findall('.//Description', namespaces):
          description = description_tag.text.strip() if description_tag.text else None
          if description:
            return description
        for definition_tag in documentation.findall('.//Definition', namespaces):
          definition = definition_tag.text.strip() if definition_tag.text else None
          if definition:
            return definition
  return None

# Example usage
schema_file = 'TXLife2.45.00fulldoc.xsd'  # Replace with your schema file name
element_name = 'ACORD_PRIVATE_CODE'  # Example element

description = get_element_description(schema_file, element_name)

if description:
  print(f"Description of element '{element_name}': {description}")
else:
  print(f"No description found for element '{element_name}'")

No description found for element 'ACORD_PRIVATE_CODE'


In [30]:
import xml.etree.ElementTree as ET

def get_type_description(xsd_file, type_name):
    # Parse the XSD file
    tree = ET.parse(xsd_file)
    root = tree.getroot()

    # Define the namespaces used in the XSD file
    namespaces = {
        'xsd': 'http://www.w3.org/2001/XMLSchema'
    }

    # Find the simpleType or complexType with the given name
    type_element = root.find(f".//xsd:simpleType[@name='{type_name}']", namespaces)
    if type_element is None:
        type_element = root.find(f".//xsd:complexType[@name='{type_name}']", namespaces)

    if type_element is None:
        return f"Type '{type_name}' not found in the XSD file."

    # Find the annotation/documentation for the type
    documentation = type_element.find(".//xsd:documentation", namespaces)

    if documentation is not None and documentation.text:
        return documentation.text.strip()
    else:
        return f"No documentation found for type '{type_name}'."

# Example usage
xsd_file = 'TXLife2.45.00fulldoc.xsd'
type_name = 'ACORD_PRIVATE_CODE'

description = get_type_description(xsd_file, type_name)
print(description)





In [33]:
import xml.etree.ElementTree as ET

def get_inner_text(element):
    """ Recursively get text from an XML element, including its children. """
    text = element.text or ''
    for child in element:
        text += get_inner_text(child)
    text += element.tail or ''
    return text

def get_type_description(xsd_file, type_name):
    # Parse the XSD file
    tree = ET.parse(xsd_file)
    root = tree.getroot()

    # Define the namespaces used in the XSD file
    namespaces = {
        'xsd': 'http://www.w3.org/2001/XMLSchema'
    }

    # Find the simpleType or complexType with the given name
    type_element = root.find(f".//xsd:simpleType[@name='{type_name}']", namespaces)
    if type_element is None:
        type_element = root.find(f".//xsd:complexType[@name='{type_name}']", namespaces)

    if type_element is None:
        return f"Type '{type_name}' not found in the XSD file."

    # Find the annotation/documentation for the type
    documentation = type_element.find(".//xsd:documentation", namespaces)

    if documentation is not None:
        return get_inner_text(documentation).strip()
    else:
        return f"No documentation found for type '{type_name}'."

# Example usage
xsd_file_path = '/content/TXLife2.45.00fulldoc.xsd'
type_name = 'ACORD_PRIVATE_CODE'

description = get_type_description(xsd_file_path, type_name)
description


'Refer to Section 4.4 "How to handle typecodes" and Section 4.5 "How to handle type code assignment" for information on defining ACORD_PRIVATE_CODE.'

In [38]:
def get_type_description_with_name(xsd_file, type_name):
    # Parse the XSD file
    tree = ET.parse(xsd_file)
    root = tree.getroot()

    # Define the namespaces used in the XSD file
    namespaces = {
        'xsd': 'http://www.w3.org/2001/XMLSchema'
    }

    # Find the simpleType or complexType with the given name
    type_element = root.find(f".//xsd:simpleType[@name='{type_name}']", namespaces)
    if type_element is None:
        type_element = root.find(f".//xsd:complexType[@name='{type_name}']", namespaces)

    if type_element is None:
        return f"Type '{type_name}' not found in the XSD file."

    # Find the annotation/documentation for the type
    documentation = type_element.find(".//xsd:documentation", namespaces)

    if documentation is not None:
        description = get_inner_text(documentation).strip()
        return f"{type_name}: {description}"
    else:
        return f"No documentation found for type '{type_name}'."

# Example usage
xsd_file_path = '/content/TXLife2.45.00fulldoc.xsd'  # replace with your XSD file path
type_name = 'ProductVersionCode'
description_with_name = get_type_description_with_name(xsd_file_path, type_name)
print(description_with_name)


Type 'ProductVersionCode' not found in the XSD file.


In [37]:
import xml.etree.ElementTree as ET

def get_inner_text(element):
    """ Recursively get text from an XML element, including its children. """
    text = element.text or ''
    for child in element:
        text += get_inner_text(child)
    text += element.tail or ''
    return text

def get_type_description(type_element, namespaces):
    """ Get the description from the type element. """
    documentation = type_element.find(".//xsd:documentation", namespaces)
    if documentation is not None:
        return get_inner_text(documentation).strip()
    else:
        return "No documentation found."

def print_first_five_descriptions(xsd_file):
    # Parse the XSD file
    tree = ET.parse(xsd_file)
    root = tree.getroot()

    # Define the namespaces used in the XSD file
    namespaces = {
        'xsd': 'http://www.w3.org/2001/XMLSchema'
    }

    # Find all simpleType and complexType elements
    elements = root.findall(".//xsd:simpleType", namespaces) + root.findall(".//xsd:complexType", namespaces)

    # Print the description for the first 5 elements
    for type_element in elements[:5]:
        type_name = type_element.get('name')
        description = get_type_description(type_element, namespaces)
        print(f"{type_name}: {description}")

# Example usage
xsd_file_path = '/content/TXLife2.45.00fulldoc.xsd'  # replace with your XSD file path
print_first_five_descriptions(xsd_file_path)


ACORD_PRIVATE_CODE: Refer to Section 4.4 "How to handle typecodes" and Section 4.5 "How to handle type code assignment" for information on defining ACORD_PRIVATE_CODE.
ACORD_TYPE_CODE: Basis for all lookups. This is usually further restricted to the allowable set of enumerations.
BANDINGTYPE_TYPE_TC: Banded with a simple break
                        
                        
                           Simple Banding
                        
                        
                           E.g. 5% if premium is less than $100,000; 4% if premium is greater than $100,000.
None: Banded with a simple break
                        
                        
                           Simple Banding
                        
                        
                           E.g. 5% if premium is less than $100,000; 4% if premium is greater than $100,000.
None: No documentation found.
