In [3]:
import os
import yaml
import re

def clean_yaml_front_matter(content):
    """Standardizes YAML front matter in Markdown files and escapes special characters."""
    lines = content.split("\n")
    if lines[0] == "---":
        try:
            end_index = lines[1:].index("---") + 1  # Find the closing ---
            yaml_content = "\n".join(lines[1:end_index])
            metadata = yaml.safe_load(yaml_content)  # Parse YAML

            # Standardize YAML fields
            if "title" in metadata:
                metadata["title"] = re.sub(r'"', "'", metadata["title"])  # Escape quotes
            if "date" in metadata:
                metadata["date"] = str(metadata["date"])  # Ensure date is a string
            if "author" not in metadata:
                metadata["author"] = "Unknown"  # Default author if missing

            # Reformat YAML
            new_yaml = "---\n" + yaml.dump(metadata, default_flow_style=False) + "---\n"

            # Return cleaned file content
            return new_yaml + "\n".join(lines[end_index + 1:])
        except Exception:
            return "\n".join(lines[end_index + 1:])  # Remove broken YAML front matter
    
    return content  # Return original if no front matter found

def normalize_markdown_files(folder_path):
    """Processes all Markdown files in a folder and ensures YAML standardization."""
    log_file = os.path.join(folder_path, "normalization_log.txt")
    with open(log_file, "w", encoding="utf-8") as log:
        for root, _, files in os.walk(folder_path):
            for file in files:
                if file.endswith(".md"):
                    file_path = os.path.join(root, file)
                    with open(file_path, "r", encoding="utf-8") as f:
                        content = f.read()
                    
                    cleaned_content = clean_yaml_front_matter(content)
                    
                    with open(file_path, "w", encoding="utf-8") as f:
                        f.write(cleaned_content)
                    
                    log.write(f"Normalized: {file_path}\n")
                    print(f"Normalized: {file_path}")

# Example usage:
# normalize_markdown_files("/path/to/markdown/files")

normalize_markdown_files("/home/nhac/git_dl/hoang-an-poetry/")

Normalized: /home/nhac/git_dl/hoang-an-poetry/README.md
Normalized: /home/nhac/git_dl/hoang-an-poetry/2013/02-02-2013-bùi-ngọ-tấn-chuyện-kể-năm-2000.md
Normalized: /home/nhac/git_dl/hoang-an-poetry/2013/10-02-2013-queen-of-heart-golden-moonchild.md
Normalized: /home/nhac/git_dl/hoang-an-poetry/2013/11-02-2013-is-the-world-that-small?!.md
Normalized: /home/nhac/git_dl/hoang-an-poetry/2013/03-01-2013-không-gian-buộc-bởi-thời-gian.md
Normalized: /home/nhac/git_dl/hoang-an-poetry/2013/17-01-2013-khuôn-viên-đông-giá.md
Normalized: /home/nhac/git_dl/hoang-an-poetry/2013/21-01-2013-mở-rộng-vòng-tay-tìm-nhau.md
Normalized: /home/nhac/git_dl/hoang-an-poetry/2013/31-01-2013-71-north-nevada.md
Normalized: /home/nhac/git_dl/hoang-an-poetry/2013/21-01-2013-tiếng-sét-ái-tình.md
Normalized: /home/nhac/git_dl/hoang-an-poetry/2013/03-01-2013-yêu-đời.md
Normalized: /home/nhac/git_dl/hoang-an-poetry/2013/05-11-2013-freedom-of-thought.md
Normalized: /home/nhac/git_dl/hoang-an-poetry/2013/11-12

Normalized: /home/nhac/git_dl/hoang-an-poetry/2012/12-12-2012-winter-preparation.md
Normalized: /home/nhac/git_dl/hoang-an-poetry/2012/06-07-2012-dịch-thơ-của-thi-sĩ-thanh-trắc-nguyễn-văn-"đá-và--trái-tim".md
Normalized: /home/nhac/git_dl/hoang-an-poetry/2012/28-05-2012-falls-a-white-petal-upon-spring-grass-greening.md
Normalized: /home/nhac/git_dl/hoang-an-poetry/2012/20-06-2012-mầu-xanh-địa-cầu.md
Normalized: /home/nhac/git_dl/hoang-an-poetry/2012/29-06-2012-gặp.md
Normalized: /home/nhac/git_dl/hoang-an-poetry/2012/19-06-2012-ân-đời-bao-la-nặng-sâu.md
Normalized: /home/nhac/git_dl/hoang-an-poetry/2012/12-07-2012-sự-bùng-nổ-thiên-tuyệt.md
Normalized: /home/nhac/git_dl/hoang-an-poetry/2012/01-06-2012-seashell-eyes-in-the-deep-blue-sea-seeing-love.md
Normalized: /home/nhac/git_dl/hoang-an-poetry/2012/22-05-2012-tự-do.md
Normalized: /home/nhac/git_dl/hoang-an-poetry/2012/24-05-2012-đam-mê.md
Normalized: /home/nhac/git_dl/hoang-an-poetry/2012/10-06-2012-ngôi-sao-ngày.md