# 获取所有文件夹工作路径。方便后续工作。 -- 这是一个工具代码

In [26]:
from __future__ import annotations

from pathlib import Path
from typing import Any, Dict, List, Tuple
from dataclasses import dataclass
import yaml

@dataclass(frozen=True)
class PipelineInputConfig:
    """输入目录配置"""
    root_directory: str
    search_depth: int
    # 若之后想改 raw_data 名字，可以加字段；现在写死即可

@dataclass(frozen=True)
class PipelineConfig:
    """Pipeline I/O 配置"""
    input: PipelineInputConfig

@dataclass(frozen=True)
class AppConfig:
    """顶层配置，完整对应 config.yaml"""
    pipeline: PipelineConfig

    @staticmethod
    def load_from_yaml(config_path: Path) -> AppConfig:
        with config_path.open("r", encoding="utf-8") as file:
            raw: Dict[str, Any] = yaml.safe_load(file)

        pipeline_input = PipelineInputConfig(**raw["pipeline"]["input"])
        pipeline = PipelineConfig(input=pipeline_input)

        return AppConfig(
            pipeline=pipeline,
        )
    
def find_parent_directories_with_raw_data(
    root_directory: Path,
    search_depth: int,
    raw_data_directory_name: str = "raw_data",
) -> List[Path]:
    """
    在 root_directory 下（限制搜索层数）查找包含 raw_data 子目录的父目录。

    示例：
        root_directory = /.../00_test
        返回：[.../00_test/00_try, .../00_test/01_try, ...]
    """
    directories_to_visit: List[Tuple[Path, int]] = [(root_directory, 0)]
    parent_directories: List[Path] = []

    while directories_to_visit:
        current_directory, current_depth = directories_to_visit.pop()

        raw_data_directory: Path = current_directory / raw_data_directory_name
        if raw_data_directory.exists() and raw_data_directory.is_dir():
            parent_directories.append(current_directory)

        if current_depth >= search_depth:
            continue

        for child in current_directory.iterdir():
            if child.is_dir():
                directories_to_visit.append((child, current_depth + 1))

    return parent_directories



In [27]:
def run_pipeline(config_path: Path) -> None:
    app_config: AppConfig = AppConfig.load_from_yaml(config_path)

    pipeline_config: PipelineConfig = app_config.pipeline

    root_directory: Path = Path(pipeline_config.input.root_directory)
    search_depth: int = pipeline_config.input.search_depth

    # 这里仍然 print 一下基础信息，方便 Notebook 立刻看到
    print(f"Root directory: {root_directory}")
    print(f"Search depth: {search_depth}")

    parent_directories: List[Path] = find_parent_directories_with_raw_data(
        root_directory=root_directory,
        search_depth=search_depth,
        raw_data_directory_name="raw_data",
    )
    if not parent_directories:
        print("No parent directories containing 'raw_data' were found.")
        return

    print("Found the following parent directories with 'raw_data':")
    for directory in parent_directories:
        print(f"  - {directory}")

In [None]:
# ============================================================
# 入口
# ============================================================

if __name__ == "__main__":
    run_pipeline(config_path=Path("config/00_config.yaml"))

Root directory: /workspace/_ty/01_data/01_16mp_2024_pipeline_data
Search depth: 1
Found the following parent directories with 'raw_data':
  - /workspace/_ty/01_data/01_16mp_2024_pipeline_data/sw2_0808-0823_04_ok
  - /workspace/_ty/01_data/01_16mp_2024_pipeline_data/ms1_0726-0809_11_ok
  - /workspace/_ty/01_data/01_16mp_2024_pipeline_data/ms2_0809-0823_10_ok
  - /workspace/_ty/01_data/01_16mp_2024_pipeline_data/lloyd_0715-0729_04_ok
  - /workspace/_ty/01_data/01_16mp_2024_pipeline_data/jeff_0613-0624_04_ok
  - /workspace/_ty/01_data/01_16mp_2024_pipeline_data/ms1_0605-0621_40_ok
  - /workspace/_ty/01_data/01_16mp_2024_pipeline_data/sw1_0711-0725_03_ok
  - /workspace/_ty/01_data/01_16mp_2024_pipeline_data/ms1_0710-0726_36_ok
  - /workspace/_ty/01_data/01_16mp_2024_pipeline_data/ms1_0809-0823_34_ok
  - /workspace/_ty/01_data/01_16mp_2024_pipeline_data/sw1_0605-0613_07_ok
  - /workspace/_ty/01_data/01_16mp_2024_pipeline_data/ms2_0726-0809_13_ok
