In [1]:
from datetime import datetime

from pathlib import Path
import pandas as pd
import xmltodict

In [2]:
data_path = Path(r"C:\Users\dmatr\Downloads\data\Архитектура HypEx.wxml")

In [3]:
type_mapping = {
    "rectangle": "global",
    "rounded rectangle": "temporary"
}

status_mapping = {
    "#38761d": "complete",
    "#0b5394": "in_progress",
    "#525c61": "planed"
}

In [4]:
def parse_node(tasks:list, node:dict, area: str, parent: dict = None):
    parent = parent or {}
    result = None
    try:
        result = {
            "task": node["@text"],
            "area": area,
            "type": type_mapping.get(node.get("@shape")) or parent.get("type"),
            "status": status_mapping.get(node.get("@bgColor")) or parent.get("status"),
            "worker": node.get('eicon', {}).get('@id')
        }
        tasks.append(result)
    except KeyError as e:
        print(e)
        print(f"In node :\n{node}")

    topic = node.get("topic", [])
    topic = topic if isinstance(topic, list) else [topic]
    for t in topic:
        parse_node(tasks, t, area, result)

def extract_tasks(data_path:Path) -> pd.DataFrame:
    with open(data_path, "r", encoding="utf-8") as f:
        xml_string = f.read()
    xml_data = xmltodict.parse(xml_string)
    tasks = []
    main_node = xml_data["map"]["topic"]["topic"]

    for area in main_node:
        parse_node(tasks,area, area["@text"])
    return pd.DataFrame(tasks)

def get_stat(tasks:pd.DataFrame):
    stat = {
        'date': datetime.now(),
        'opened tasks': tasks[tasks['status'] != 'complete'].shape[0],
    }

    for w in tasks['worker'].unique():
        if w is not None:
            stat[w] = tasks[(tasks['worker'] == w )&(tasks['status'] != 'complete')].shape[0]
    
    return pd.DataFrame([stat])

    

In [5]:
tasks = extract_tasks(data_path)
tasks

Unnamed: 0,task,area,type,status,worker
0,Dataset,Dataset,,,
1,ExperimentData,Dataset,global,complete,
2,Функции управления и атрибуты,Dataset,global,complete,
3,Обёртка groupby в Dataset,Dataset,temporary,in_progress,🦊
4,Функции создания,Dataset,temporary,complete,
5,update,Dataset,temporary,in_progress,🦊
6,ID,Dataset,global,complete,
7,ID-Name,Dataset,global,complete,
8,Читаемый Hash,Dataset,global,complete,
9,Building indexing,Dataset,global,complete,


In [6]:
tasks.to_excel("tasks.xlsx", index=False, sheet_name="tasks")

In [7]:
stat = get_stat(tasks)
stat.to_excel("stat.xlsx", index=False, sheet_name="stat")
stat

Unnamed: 0,date,opened tasks,🦊,🤖
0,2024-03-10 18:57:35.754305,23,7,7


: 