In [20]:
f_t ="""
my_project/
│
├── README.md
├── requirements.txt
├── setup.py
├── .gitignore
├── pyproject.toml
│
├── src/
│   └── my_project/
│       ├── __init__.py
│       ├── main.py
│       ├── core/
│       │   ├── __init__.py
│       │   ├── processor.py
│       │   └── utils.py
│       └── data/
│           ├── __init__.py
│           ├── loader.py
│           └── preprocess.py
│
├── tests/
│   ├── __init__.py
│   ├── test_core.py
│   └── test_data.py
│
├── notebooks/
│   └── exploration.ipynb
│
├── data/
│   ├── raw/
│   └── processed/
│
└── docs/
    └── index.md
"""

In [None]:
class Folder:
    def __init__(self, name: str, parent_name: str = None):
        self.name = name
        self.contents = [] 
        self.parent_name = parent_name
        
    def add_item(self, item):
        self.contents.append(item)

In [31]:
class File:
    def __init__(self, name: str, parent_name: str):
        self.name = name
        self.parent_name = parent_name

In [6]:
slplit_f_t = f_t.split("\n")

In [11]:
import re 

all_names = []
for line in slplit_f_t:
    name = re.sub(r"^[│├└─\s]*──\s*", "", line).split(" #")[0].strip()
    if name:
        all_names.append(name)


print(all_names)

['project-name/', 'README.md', 'LICENSE', 'requirements.txt', 'requirements-dev.txt', 'setup.py', '.gitignore', '.env', 'Makefile', '│', 'config/', 'init.py', 'default.yaml', 'model_configs/', '│', 'src/', 'project_name/', 'init.py', 'main.py', '│ │', 'core/', 'init.py', 'dataset.py', 'model.py', '│ │', 'utils/', 'init.py', 'image.py', 'io.py', 'logging.py', '│ │', 'pipelines/', 'init.py', 'train.py', 'evaluate.py', 'inference.py', '│ │', 'cli/', 'init.py', 'commands.py', '│', 'data/', 'raw/', 'interim/', 'processed/', '│', 'models/', 'checkpoints/', 'final/', '│', 'notebooks/', '01_exploration.ipynb', '02_data_preparation.ipynb', '03_model_prototyping.ipynb', '│', 'reports/', 'figures/', 'results.md', '│', 'tests/', 'init.py', 'conftest.py', 'unit/', 'test_utils.py', 'test_dataset.py', 'integration/', 'test_pipeline.py', '│', 'logs/', 'docs/']


In [12]:
all_folder_names = [name for name in all_names if name.endswith('/')]
print(all_folder_names)

['project-name/', 'config/', 'model_configs/', 'src/', 'project_name/', 'core/', 'utils/', 'pipelines/', 'cli/', 'data/', 'raw/', 'interim/', 'processed/', 'models/', 'checkpoints/', 'final/', 'notebooks/', 'reports/', 'figures/', 'tests/', 'unit/', 'integration/', 'logs/', 'docs/']


In [None]:
structure = []
for line in slplit_f_t[1:]:
    if line.endswith('/'):
        name = re.sub(r"^[│├└─\s]*──\s*", "", line).split(" #")[0].strip()
        folder = Folder(name)
        structure.append(folder)
    else:
        if line != "":
            name = re.sub(r"^[│├└─\s]*──\s*", "", line).split(" #")[0].strip()
            if name != "│":
                file = File(name, parent_name=folder.name)
                folder.add_item(file)




In [41]:
print([structure.name for structure in structure])

['project-name', 'config', 'src', 'project_name', 'core', 'utils', 'pipelines', 'cli', 'data', 'notebooks', 'reports', 'tests', 'unit', 'integration', 'logs', 'docs']


In [47]:
print(structure[0].contents[8].name)

│


In [1]:
import os
import re
from pathlib import Path
from typing import List, Tuple, Optional


In [None]:
class StructureParser:
    """Парсер markdown структуры проекта"""
    
    def __init__(self):
        self.tree_symbols = ['├──', '└──', '│', '─', '├', '└', '|']
    
    def parse(self, text: str) -> List[Tuple[str, int]]:
        """
        Парсит markdown структуру и возвращает список (путь, уровень_вложенности)
        
        Args:
            text: Markdown строка со структурой проекта
            
        Returns:
            List[Tuple[str, int]]: Список кортежей (имя_файла_или_папки, уровень)
        """
        lines = text.split('\n')
        result = []
        
        for line in lines[1:]:
            line = re.sub(r'#.*$', '', line)
            line = line.split()
            indent_level = (len(line)-1)
            if len(line) != 0:
                if line[-1]!= '│' and line[-1] != '':
                    cleaned = line[-1]
                    result.append((cleaned, indent_level))
        
        return result
    
    def build_paths(self, parsed: List[Tuple[str, int]]) -> Tuple[Optional[str], List[Tuple[str, bool]]]:
        """
        Строит полные пути из распарсенной структуры
        
        Args:
            parsed: Список кортежей (имя, уровень)
            
        Returns:
            Tuple[Optional[str], List[Tuple[str, bool]]]: 
                (имя_корневой_папки, список_путей)
                где список_путей = [(полный_путь, является_директорией)]
        """
        if not parsed:
            return None, []
        
        # Первая строка - это корневая папка проекта
        root_name, root_level = parsed[0]
        root_name = root_name.rstrip('/')
        
        paths = []
        stack = []  # Стек для отслеживания текущего пути
        
        # Обрабатываем остальные элементы (начиная со второго)
        for name, level in parsed[2:]:
            # Корректируем стек под текущий уровень (относительно корня)
            adjusted_level = level - root_level
            while len(stack) > adjusted_level:
                stack.pop()
            
            # Определяем, является ли элемент директорией
            is_dir = name.endswith('/')
            clean_name = name.rstrip('/')
            
            # Строим полный путь (относительно корня проекта)
            if stack:
                full_path = os.path.join(*stack, clean_name)
            else:
                full_path = clean_name
            
            paths.append((full_path, is_dir))
            
            # Добавляем в стек, если это директория
            if is_dir:
                stack.append(clean_name)
        
        return root_name, paths

In [40]:
parser = StructureParser()
parsed_structure = parser.parse(f_t)
for item in parsed_structure:
    print(item)

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "C:\Users\kadr8\AppData\Roaming\Python\Python313\site-packages\IPython\core\interactiveshell.py", line 3699, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
    ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\kadr8\AppData\Local\Temp\ipykernel_12684\1081307056.py", line 2, in <module>
    parsed_structure = parser.parse(f_t)
  File "C:\Users\kadr8\AppData\Local\Temp\ipykernel_12684\4238653550.py", line 17, in parse
    lines = text.split('\n').strip()
            ^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'list' object has no attribute 'strip'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\kadr8\AppData\Roaming\Python\Python313\site-packages\IPython\core\interactiveshell.py", line 2194, in showtraceback
    stb = self.InteractiveTB.structured_traceback(
        etype, value, tb, tb_offset=tb_offset
    )
  File "C:\Users\kadr8\AppData\Roa

In [32]:
path_root, paths = parser.build_paths(parsed_structure)
print(f"Root folder: {path_root}")

Root folder: README.md


In [34]:
for pathw in paths:
    print(pathw)

('setup.py', False)
('.gitignore', False)
('pyproject.toml', False)
('src', True)
('src\\my_project', True)
('src\\__init__.py', False)
('src\\main.py', False)
('src\\core', True)
('src\\core\\__init__.py', False)
('src\\core\\processor.py', False)
('src\\core\\utils.py', False)
('src\\data', True)
('src\\__init__.py', False)
('src\\loader.py', False)
('src\\preprocess.py', False)
('tests', True)
('tests\\__init__.py', False)
('tests\\test_core.py', False)
('tests\\test_data.py', False)
('notebooks', True)
('notebooks\\exploration.ipynb', False)
('data', True)
('data\\raw', True)
('data\\processed', True)
('docs', True)
('index.md', False)


In [21]:
strip_f_t = f_t.split("\n")

In [37]:
for line in strip_f_t[1:]:
    line = re.sub(r'#.*$', '', line)
    line = line.split()
    print(f"{line} - {len(line)-1}")

['my_project/'] - 0
['│'] - 0
['├──', 'README.md'] - 1
['├──', 'requirements.txt'] - 1
['├──', 'setup.py'] - 1
['├──', '.gitignore'] - 1
['├──', 'pyproject.toml'] - 1
['│'] - 0
['├──', 'src/'] - 1
['│', '└──', 'my_project/'] - 2
['│', '├──', '__init__.py'] - 2
['│', '├──', 'main.py'] - 2
['│', '├──', 'core/'] - 2
['│', '│', '├──', '__init__.py'] - 3
['│', '│', '├──', 'processor.py'] - 3
['│', '│', '└──', 'utils.py'] - 3
['│', '└──', 'data/'] - 2
['│', '├──', '__init__.py'] - 2
['│', '├──', 'loader.py'] - 2
['│', '└──', 'preprocess.py'] - 2
['│'] - 0
['├──', 'tests/'] - 1
['│', '├──', '__init__.py'] - 2
['│', '├──', 'test_core.py'] - 2
['│', '└──', 'test_data.py'] - 2
['│'] - 0
['├──', 'notebooks/'] - 1
['│', '└──', 'exploration.ipynb'] - 2
['│'] - 0
['├──', 'data/'] - 1
['│', '├──', 'raw/'] - 2
['│', '└──', 'processed/'] - 2
['│'] - 0
['└──', 'docs/'] - 1
['└──', 'index.md'] - 1
[] - -1


In [None]:
for line in strip_f_t:
    line = re.sub(r'#.*$', '', line)
    line = line.split()
    print(f"{line} - {len(line)-1}")
    if line[:-1].endswith('/'):
        
        

In [38]:
test = ['│']
print(test[-1])

│
