<a href="https://colab.research.google.com/github/DiegoCarrillo19/Absenteeism-Random-Forest/blob/main/Transformar_tree_rules_txt_a_csv.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import re
import csv

def parse_tree_rules(rules_text):
    lines = rules_text.strip().split('\n')
    data = []
    stack = []  # para guardar condiciones por nivel

    for line in lines:
        level = line.count('|   ')
        line_clean = line.replace('|   ', '').replace('|--- ', '').strip()
        class_match = re.search(r'class: (\d+)', line_clean)

        if class_match:
            class_label = class_match.group(1)
            # Al encontrar nodo hoja, la regla completa es la concatenación del stack + esta clase
            full_rule = " AND ".join(stack[:level]) if level > 0 else ""
            full_rule += f" => class: {class_label}"
            data.append({
                'level': level,
                'feature': None,
                'operator': None,
                'threshold': None,
                'class': class_label,
                'rule_text': line_clean,
                'full_rule': full_rule
            })
        else:
            m = re.match(r'([\w_]+)\s*([<>=]+)\s*([\d\.]+)', line_clean)
            if m:
                feature, operator, threshold = m.groups()
                condition = f"{feature} {operator} {threshold}"

                # Actualizar stack (guardar condición por nivel)
                if level == len(stack):
                    stack.append(condition)
                else:
                    stack = stack[:level] + [condition]

                data.append({
                    'level': level,
                    'feature': feature,
                    'operator': operator,
                    'threshold': threshold,
                    'class': None,
                    'rule_text': line_clean,
                    'full_rule': None
                })
            else:
                data.append({
                    'level': level,
                    'feature': None,
                    'operator': None,
                    'threshold': None,
                    'class': None,
                    'rule_text': line_clean,
                    'full_rule': None
                })
    return data

# --- EJEMPLO DE USO ---
with open('reglas_arbol.txt', 'r') as f:
    rules_text = f.read()

parsed_rules = parse_tree_rules(rules_text)

keys = ['level', 'feature', 'operator', 'threshold', 'class', 'rule_text', 'full_rule']
with open('tree_rules_parsed_full.csv', 'w', newline='') as output_file:
    dict_writer = csv.DictWriter(output_file, fieldnames=keys)
    dict_writer.writeheader()
    dict_writer.writerows(parsed_rules)

print("Archivo 'tree_rules_parsed_full.csv' creado con éxito.")



Archivo 'tree_rules_parsed_full.csv' creado con éxito.
