In [2]:
import os

def print_directory_structure(root_dir, indent="", exclude_dirs=None):
    """
    Imprime la estructura del directorio excluyendo carpetas específicas de forma recursiva.

    Parameters:
    - root_dir: str, el directorio raíz.
    - indent: str, la indentación para la estructura (usada en llamadas recursivas).
    - exclude_dirs: list, lista de directorios a excluir de la estructura.
    """
    exclude_dirs = exclude_dirs or []  # Lista de exclusión predeterminada

    for item in os.listdir(root_dir):
        path = os.path.join(root_dir, item)
        
        # Omitir directorios en la lista de exclusión
        if os.path.isdir(path) and item in exclude_dirs:
            continue

        if os.path.isdir(path):
            print(f"{indent}├── {item}/")
            print_directory_structure(path, indent + "│   ", exclude_dirs)
        else:
            if item not in exclude_dirs:
                print(f"{indent}├── {item}")

# Cambia `root_dir` por el path de tu proyecto y ajusta `exclude_dirs`
root_dir_backend = "."

exclude_dirs = [
    "mlruns", ".git/",
    ".git", "__pycache__", ".next", "node_modules",
    ".pytest_cache", '__init__.py',
    'effycentai_framework_core_llm',
    'effycentai_framework_llm_openai',
    'effycentai_framework_preprocessing_documents',
    'README_dev.md', 'temp', 'bash_git_0.sh'
    'dev.ipynb', '.flake8', '.github', '.python-version',
    'dev_utils', 'frontend-websockets-test', '.ruff_cache',
    '.venv', 'logs/', 'unsloth_compiled_cache/',
    'unsloth_compiled_cache'
]  # Agrega aquí las carpetas que quieres excluir
print_directory_structure(root_dir_backend, exclude_dirs=exclude_dirs)


├── .env
├── .env.example
├── .gitignore
├── .vscode/
│   ├── launch.json
│   ├── settings.json
├── app_backend.py
├── app_frontend_vtuber_studio.py
├── assets/
│   ├── score_2025-04-26.png
├── datasets/
│   ├── curated/
│   │   ├── mirai/
│   │   │   ├── parts/
│   │   │   │   ├── dataset_about_mirai.jsonl
│   │   │   │   ├── dataset_funny_questions.jsonl
│   │   │   ├── test/
│   │   │   │   ├── dataset.jsonl
│   │   │   ├── train/
│   │   │   │   ├── dataset.jsonl
│   │   │   │   ├── MirAI-Dataset - Foundations - test.csv
│   │   │   │   ├── MirAI-Dataset - Foundations.csv
├── dataset_consolidator.ipynb
├── dev/
│   ├── prompting/
│   │   ├── directory_util.ipynb
│   │   ├── README_general_prompt.md
│   ├── qlora/
│   │   ├── dev_mirAI_v0_qlora_instruct.ipynb
│   │   ├── outputs/
│   │   │   ├── runs/
│   │   │   │   ├── May05_21-22-46_magod/
│   │   │   │   │   ├── events.out.tfevents.1746498167.magod.85476.0
│   ├── rlhf/
│   │   ├── dev_dpo.ipynb
│   ├── testing/
│   ├── tts/
│  

In [1]:
from quantum_backend.utils.lib.agents.LLMHighChartsAgent import LLMHighChartsAgent
import re
import xml.etree.ElementTree as ET

def extract_xml_data(response: str) -> dict:
        """
        Parse the LLM response as XML. Expected XML structure:

        <response>
            <chart_category>...</chart_category>
            <chart_type>...</chart_type>
            <sql><![CDATA[...]]></sql>
        </response>

        Returns a dict with keys 'chart_category', 'chart_type', and 'sql' if successful; otherwise None.

        This improved version:
        - Removes markdown code fences (e.g., ```xml ... ```).
        - Strips extraneous whitespace.
        - Logs detailed error information if parsing fails.
        """
        # Trim whitespace from the response
        cleaned_response = response.strip()

        # Remove markdown code fences if present (handles ``` or ```xml)
        if cleaned_response.startswith("```"):
            # Remove starting fence (optional language specifier)
            cleaned_response = re.sub(r"^```(?:xml)?\s*", "", cleaned_response)
            # Remove trailing fence
            cleaned_response = re.sub(r"\s*```$", "", cleaned_response)

        try:
            root = ET.fromstring(cleaned_response)
        except ET.ParseError as e:
            print(
                f"Error parsing XML: {e}. Response content: {cleaned_response}"
            )
            return None

        chart_category = (root.findtext("chart_category") or "value").strip().lower()
        chart_type = (root.findtext("chart_type") or "value").strip().lower()
        sql_text = (root.findtext("sql") or "").strip()

        # Clean the SQL text by removing CDATA markers if present.
        sql_text = LLMHighChartsAgent.clean_sql_text(sql_text)
        sql_query = LLMHighChartsAgent.extract_sql_from_response(sql_text)

        return {
            "chart_category": chart_category,
            "chart_type": chart_type,
            "sql": sql_query,
        }

extract_xml_data("""<response>
    <sql><![CDATA[
        WITH monthly_totals AS (
            SELECT
                DATE_TRUNC('month', order_date) AS month_date,
                SUM(revision_count) AS total_revisions
            FROM
                silver.dp_purchase_orders_manufacters
            GROUP BY
                DATE_TRUNC('month', order_date)
        ),
        percentages AS (
            SELECT
                DATE_TRUNC('month', po.order_date) AS month_date,
                po.revision_count,
                CASE
                    WHEN mt.total_revisions = 0 THEN 0
                    ELSE (po.revision_count::float / NULLIF(mt.total_revisions, 0)) * 100
                END AS revision_percentage
            FROM
                silver.dp_purchase_orders_manufacters po
            JOIN
                monthly_totals mt ON DATE_TRUNC('month', po.order_date) = mt.month_date
        )
        SELECT
            TO_CHAR(month_date, 'YYYY-MM') AS category,
            AVG(revision_percentage) AS value
        FROM
            percentages
        GROUP BY
            month_date
        ORDER BY
            category;
    ]]></sql>
    <chart_category>categories</chart_category>
    <chart_type>bar</chart_type>
</response>
""")

{'chart_category': 'categories',
 'chart_type': 'bar',
 'sql': "WITH monthly_totals AS (\n            SELECT\n                DATE_TRUNC('month', order_date) AS month_date,\n                SUM(revision_count) AS total_revisions\n            FROM\n                silver.dp_purchase_orders_manufacters\n            GROUP BY\n                DATE_TRUNC('month', order_date)\n        ),\n        percentages AS (\n            SELECT\n                DATE_TRUNC('month', po.order_date) AS month_date,\n                po.revision_count,\n                CASE\n                    WHEN mt.total_revisions = 0 THEN 0\n                    ELSE (po.revision_count::float / NULLIF(mt.total_revisions, 0)) * 100\n                END AS revision_percentage\n            FROM\n                silver.dp_purchase_orders_manufacters po\n            JOIN\n                monthly_totals mt ON DATE_TRUNC('month', po.order_date) = mt.month_date\n        )\n        SELECT\n            TO_CHAR(month_date, 'YYYY-MM')