In [1]:
import os
import re
import datetime

# --- CONFIGURATION ---
# The root folder where your Obsidian Vault files are located.
# NOTE: Replace this with your actual, correct path.
ROOT_FOLDER = r"C:\Users\BalasubramanianPG\Videos\Obsidian Vault\Pharma Domain"

# The desired date format (DD-MM-YY)
DATE_FORMAT = "%d-%m-%y"

# The properties you want to ensure are in the YAML front matter.
# The values are set to defaults (date, empty lists/string).
YAML_TEMPLATE = {
    "Created": "",  # Will be populated with file creation date
    "Area": "[]",  # List for linking to other files
    "Tags": "[]",  # List for classifications
    "Description": '""',  # Text description
    "Sub Area": "[]"  # List for other files
}
# ---------------------


def get_file_creation_date(filepath):
    """
    Retrieves the file's creation timestamp and formats it.
    """
    try:
        # On Windows, 'st_ctime' is often the creation time.
        # Use 'st_mtime' (modification time) as a robust fallback/alternative
        # if creation time isn't reliably available or desired.
        timestamp = os.path.getctime(filepath)
        dt_object = datetime.datetime.fromtimestamp(timestamp)
        return dt_object.strftime(DATE_FORMAT)
    except Exception as e:
        print(f"Error getting date for {filepath}: {e}")
        return datetime.datetime.now().strftime(DATE_FORMAT) # Fallback to current date

def update_yaml_front_matter(filepath, creation_date):
    """
    Reads a markdown file, updates/inserts YAML front matter, and writes changes.
    """
    print(f"Processing file: {filepath}")

    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            content = f.read()

        # 1. Prepare the properties with the current creation date
        current_yaml = YAML_TEMPLATE.copy()
        current_yaml["Created"] = f'"{creation_date}"'

        # 2. Check for existing YAML front matter (--- ... ---) at the very start
        yaml_match = re.match(r'---\s*\n(.*?)\n---\s*\n', content, re.DOTALL)

        if yaml_match:
            # --- UPDATE EXISTING YAML ---
            existing_yaml_block = yaml_match.group(1)
            new_yaml_lines = existing_yaml_block.split('\n')
            existing_keys = set()
            
            # Use regex to find and replace existing keys
            for key, default_value in current_yaml.items():
                pattern = r"^" + re.escape(key) + r":.*$"
                replacement = f"{key}: {default_value}"
                
                # Check if the key already exists
                if re.search(pattern, existing_yaml_block, re.MULTILINE):
                    # Replace the existing line
                    existing_yaml_block = re.sub(pattern, replacement, existing_yaml_block, 1, re.MULTILINE)
                    existing_keys.add(key)
                
            # Add missing keys to the end of the YAML block
            missing_keys = [
                f"{key}: {current_yaml[key]}" 
                for key in current_yaml if key not in existing_keys
            ]
            
            if missing_keys:
                 existing_yaml_block += "\n" + "\n".join(missing_keys)

            # Reconstruct the full content
            new_yaml_block = f"---\n{existing_yaml_block.strip()}\n---"
            
            # The rest of the content after the original YAML block
            new_content = re.sub(r'---\s*\n(.*?)\n---\s*\n', new_yaml_block + '\n', content, 1, re.DOTALL)
            
        else:
            # --- INSERT NEW YAML ---
            yaml_lines = [f"{key}: {current_yaml[key]}" for key in current_yaml]
            new_yaml_block = "---\n" + "\n".join(yaml_lines) + "\n---"
            
            # Prepend the new YAML block to the existing content
            new_content = new_yaml_block + "\n" + content.strip()
            
        # 3. Write the updated content back to the file (No backup, as requested)
        with open(filepath, 'w', encoding='utf-8') as f:
            f.write(new_content.strip() + "\n")
            
        print(f"Successfully updated YAML in: {os.path.basename(filepath)}")

    except Exception as e:
        print(f"FAILED to process {filepath}. Error: {e}")


def main():
    """
    Walks the directory and calls the update function for each markdown file.
    """
    print(f"Starting script to update Markdown YAML properties in: {ROOT_FOLDER}")
    
    if not os.path.exists(ROOT_FOLDER):
        print(f"Error: The configured folder does not exist: {ROOT_FOLDER}")
        return
        
    md_file_count = 0
    
    # os.walk generates the file names in a directory tree
    for root, _, files in os.walk(ROOT_FOLDER):
        for filename in files:
            if filename.lower().endswith('.md'):
                md_file_count += 1
                filepath = os.path.join(root, filename)
                
                # 1. Get the creation date in the specified format
                creation_date = get_file_creation_date(filepath)
                
                # 2. Update/Insert the YAML front matter
                update_yaml_front_matter(filepath, creation_date)

    print("\n--- Script Finished ---")
    print(f"Total markdown files processed: {md_file_count}")
    print("Please review your files in Obsidian.")


if __name__ == "__main__":
    main()

Starting script to update Markdown YAML properties in: C:\Users\BalasubramanianPG\Videos\Obsidian Vault\Pharma Domain
Processing file: C:\Users\BalasubramanianPG\Videos\Obsidian Vault\Pharma Domain\Terminologies to Master First.md
Successfully updated YAML in: Terminologies to Master First.md
Processing file: C:\Users\BalasubramanianPG\Videos\Obsidian Vault\Pharma Domain\The Pharma Overview.md
Successfully updated YAML in: The Pharma Overview.md
Processing file: C:\Users\BalasubramanianPG\Videos\Obsidian Vault\Pharma Domain\Cheat Sheets\1. Commercial Pharma\1.1 Sales & Marketing.md
Successfully updated YAML in: 1.1 Sales & Marketing.md
Processing file: C:\Users\BalasubramanianPG\Videos\Obsidian Vault\Pharma Domain\Cheat Sheets\1. Commercial Pharma\1.2 Market Access & Patient Services.md
Successfully updated YAML in: 1.2 Market Access & Patient Services.md
Processing file: C:\Users\BalasubramanianPG\Videos\Obsidian Vault\Pharma Domain\Cheat Sheets\1. Commercial Pharma\1.3 Medical Affair

  existing_yaml_block = re.sub(pattern, replacement, existing_yaml_block, 1, re.MULTILINE)
  new_content = re.sub(r'---\s*\n(.*?)\n---\s*\n', new_yaml_block + '\n', content, 1, re.DOTALL)


In [2]:
import os
import re
import datetime

# --- CONFIGURATION ---
# The root folder where your Obsidian Vault files are located.
# NOTE: Ensure this path is correct.
ROOT_FOLDER = r"C:\Users\BalasubramanianPG\Videos\Obsidian Vault\Pharma Domain"

# The desired date format (DD-MM-YY)
DATE_FORMAT = "%d-%m-%y"
# ---------------------

def format_yaml_list(items):
    """Formats a list of strings into a YAML-compatible list of Obsidian links or tags."""
    if not items:
        return '[]'
        
    # Enclose link targets (starting with [[) in double quotes for robust YAML parsing.
    # Tags (without [[) are generally safe without quotes unless they contain special characters.
    formatted_items = [f'"{item}"' if item.startswith('[[') else item for item in items]
    
    # Use the YAML inline list style for clean insertion
    return f'[{", ".join(formatted_items)}]'


def analyze_path_and_get_properties(filepath, root_folder):
    """
    Analyzes the file path relative to the root folder to determine Area, Sub Area, and Tags.
    """
    try:
        # 1. Normalize and get the path relative to the ROOT_FOLDER
        # os.path.normpath handles different slashes, and os.sep is the correct separator.
        relative_path = os.path.normpath(os.path.relpath(filepath, root_folder))
        
        # 2. Split the path into components (folders) using the OS separator
        path_components = relative_path.split(os.sep)
        
        # The last component is the filename, we only want folders
        if path_components[-1].endswith('.md'):
            folder_components = path_components[:-1]
        else:
            folder_components = []

        # Initialize properties
        area = []
        sub_area = []
        tags = ['pharma-domain']
        
        # --- Logic for determining Area, Sub Area, and Tags based on top-level folder ---
        
        if not folder_components:
            area.append('[[Overview]]')
            tags.extend(['foundations', 'overview'])
        
        elif folder_components[0] == 'Cheat Sheets':
            tags.append('cheat-sheet')
            if len(folder_components) > 1:
                # e.g., '1. Commercial Pharma' -> Area
                # Note: The split is non-greedy and handles titles that might contain periods
                main_topic = folder_components[1].split('. ', 1)[-1].strip()
                area.append(f'[[{main_topic}]]')
                tags.append(main_topic.lower().replace(' & ', ' ').replace(' ', '-'))
                
                if len(folder_components) > 2:
                    # e.g., '1.1 Sales & Marketing' -> Sub Area
                    sub_topic = folder_components[2].split('. ', 1)[-1].strip()
                    sub_area.append(f'[[{sub_topic}]]')
                    tags.append(sub_topic.lower().replace(' & ', ' ').replace(' ', '-'))

        elif folder_components[0] == 'KPI Family':
            area.append('[[KPIs]]')
            if len(folder_components) > 1:
                sub_topic = folder_components[1].replace(' KPIs', '').strip()
                sub_area.append(f'[[{sub_topic}]]')
                tags.extend(['kpi', sub_topic.lower().replace(' ', '-')])

        elif folder_components[0] == 'Pharma Data 101':
            area.append('[[Pharma Data 101]]')
            tags.append('data-101')
            if len(folder_components) > 1:
                sub_topic = folder_components[1].split('. ', 1)[-1].strip()
                sub_area.append(f'[[{sub_topic}]]')
                tags.extend(['data', sub_topic.lower().replace(' ', '-')])
                
        elif folder_components[0] == 'The Training Document':
            tags.append('training')
            if len(folder_components) > 1:
                main_topic = folder_components[1].split('. ', 1)[-1].strip()
                area.append(f'[[{main_topic}]]')
                tags.append(main_topic.lower().replace(' & ', ' ').replace(' ', '-'))
                
                if len(folder_components) > 2:
                    sub_topic = folder_components[2].split('. ', 1)[-1].strip()
                    sub_area.append(f'[[{sub_topic}]]')
                    tags.append(sub_topic.lower().replace(' & ', ' ').replace(' ', '-'))
        
        elif folder_components[0] == 'Phases':
            area.append('[[Learning Path]]')
            tags.append('learning-path')
            if len(folder_components) > 1:
                # Use the full folder name for phase links
                sub_area.append(f'[[{folder_components[1]}]]')
                tags.append(folder_components[1].lower().replace(' ', '-'))

        # Final property dictionary for the YAML update function
        return {
            "Created": "", # To be populated with date
            "Area": format_yaml_list([a for a in area if a]),
            "Sub Area": format_yaml_list([s for s in sub_area if s]),
            "Tags": format_yaml_list([t for t in tags if t]), # Filter empty tags
            "Description": '""'
        }
    
    except Exception as e:
        print(f"Error during path analysis for {filepath}: {e}")
        # Return a fallback structure
        return {
            "Created": "", 
            "Area": '[]', 
            "Sub Area": '[]', 
            "Tags": '["error-check-path"]', 
            "Description": '""'
        }


def get_file_creation_date(filepath):
    """Retrieves the file's creation timestamp and formats it (DD-MM-YY)."""
    try:
        # Edge case: os.path.getctime might fail if file access is restricted
        timestamp = os.path.getctime(filepath)
        dt_object = datetime.datetime.fromtimestamp(timestamp)
        return dt_object.strftime(DATE_FORMAT)
    except Exception as e:
        # Fallback to current date or a safe string
        print(f"Warning: Error getting date for {filepath}. Falling back to current date. Error: {e}")
        return datetime.datetime.now().strftime(DATE_FORMAT)


def update_yaml_front_matter(filepath, yaml_properties):
    """
    Reads a markdown file, updates/inserts YAML front matter, and writes changes.
    """
    print(f"Processing file: {os.path.basename(filepath)}")
    
    # 1. Prepare final properties including the date
    yaml_properties["Created"] = f'"{get_file_creation_date(filepath)}"'
    
    try:
        # Edge case: File might be open or permissions restricted
        with open(filepath, 'r', encoding='utf-8') as f:
            content = f.read()

        # Regex to find existing YAML front matter (--- ... ---) at the very start
        # Now ensures the YAML is followed by a newline for clean parsing
        yaml_match = re.match(r'^---\s*\n(.*?)\n---\s*\n', content, re.DOTALL)
        
        required_keys = yaml_properties.keys()

        if yaml_match:
            # --- UPDATE EXISTING YAML ---
            existing_yaml_block = yaml_match.group(1)
            existing_keys = set()
            
            # 1. Update/Replace existing keys (Case-insensitive match for robustness)
            for key, value in yaml_properties.items():
                # Pattern to match the key line robustly (including spaces/tabs after colon)
                # re.escape ensures special characters in the key don't break the regex
                pattern = r"^" + re.escape(key) + r"\s*:\s*.*$"
                replacement = f"{key}: {value}"
                
                if re.search(pattern, existing_yaml_block, re.MULTILINE | re.IGNORECASE):
                    # Replace the existing line (using re.sub with count=1)
                    existing_yaml_block = re.sub(pattern, replacement, existing_yaml_block, 1, re.MULTILINE | re.IGNORECASE)
                    existing_keys.add(key)
                
            # 2. Add missing keys to the end of the YAML block
            missing_keys = [
                f"{key}: {yaml_properties[key]}" 
                for key in required_keys if key not in existing_keys
            ]
            
            if missing_keys:
                 # Add a newline only if the block is not empty already
                 if existing_yaml_block.strip():
                     existing_yaml_block += "\n"
                 existing_yaml_block += "\n".join(missing_keys)

            # Reconstruct and replace the full content
            new_yaml_block = f"---\n{existing_yaml_block.strip()}\n---"
            
            # The rest of the content after the original YAML block
            new_content = re.sub(r'^---\s*\n(.*?)\n---\s*\n', new_yaml_block + '\n', content, 1, re.DOTALL)
            
        else:
            # --- INSERT NEW YAML ---
            yaml_lines = [f"{key}: {yaml_properties[key]}" for key in required_keys]
            new_yaml_block = "---\n" + "\n".join(yaml_lines) + "\n---"
            
            # Prepend the new YAML block to the existing content
            new_content = new_yaml_block + "\n" + content.strip()
            
        # 3. Write the updated content back to the file
        with open(filepath, 'w', encoding='utf-8') as f:
            # Ensure file ends with a single newline after content strip
            f.write(new_content.strip() + "\n")
            
        print(f"  -> Success. Area: {yaml_properties['Area']}, Tags: {yaml_properties['Tags']}")

    except PermissionError:
        print(f"  -> FAILED: Permission denied for {filepath}. Check if the file is open.")
    except Exception as e:
        print(f"  -> FAILED to process {filepath}. General Error: {e}")


def main():
    """Walks the directory and calls the update function for each markdown file."""
    print(f"Starting script to dynamically update Markdown YAML properties in: {ROOT_FOLDER}")
    
    # Edge case: Check if the configured folder exists before starting the expensive walk
    if not os.path.exists(ROOT_FOLDER):
        print(f"Error: The configured folder does not exist: {ROOT_FOLDER}")
        return
        
    md_file_count = 0
    
    # os.walk handles directory traversal errors gracefully, but we catch file-specific errors in update_yaml_front_matter
    for root, _, files in os.walk(ROOT_FOLDER):
        for filename in files:
            if filename.lower().endswith('.md'):
                md_file_count += 1
                filepath = os.path.join(root, filename)
                
                # 1. Analyze the path and get the desired YAML properties
                yaml_properties = analyze_path_and_get_properties(filepath, ROOT_FOLDER)
                
                # 2. Update/Insert the YAML front matter
                update_yaml_front_matter(filepath, yaml_properties)

    print("\n--- Script Finished ---")
    print(f"Total markdown files processed: {md_file_count}")
    print("Please review your files in Obsidian.")


if __name__ == "__main__":
    main()

Starting script to dynamically update Markdown YAML properties in: C:\Users\BalasubramanianPG\Videos\Obsidian Vault\Pharma Domain
Processing file: Terminologies to Master First.md
  -> Success. Area: ["[[Overview]]"], Tags: [pharma-domain, foundations, overview]
Processing file: The Pharma Overview.md
  -> Success. Area: ["[[Overview]]"], Tags: [pharma-domain, foundations, overview]
Processing file: 1.1 Sales & Marketing.md
  -> Success. Area: ["[[Commercial Pharma]]"], Tags: [pharma-domain, cheat-sheet, commercial-pharma]
Processing file: 1.2 Market Access & Patient Services.md
  -> Success. Area: ["[[Commercial Pharma]]"], Tags: [pharma-domain, cheat-sheet, commercial-pharma]
Processing file: 1.3 Medical Affairs.md
  -> Success. Area: ["[[Commercial Pharma]]"], Tags: [pharma-domain, cheat-sheet, commercial-pharma]
Processing file: 1.4 Clinical Development.md
  -> Success. Area: ["[[Commercial Pharma]]"], Tags: [pharma-domain, cheat-sheet, commercial-pharma]
Processing file: 2. Clinic

  existing_yaml_block = re.sub(pattern, replacement, existing_yaml_block, 1, re.MULTILINE | re.IGNORECASE)
  new_content = re.sub(r'^---\s*\n(.*?)\n---\s*\n', new_yaml_block + '\n', content, 1, re.DOTALL)


  -> Success. Area: ["[[Pharma Data 101]]"], Tags: [pharma-domain, data-101, data, claims-data]
Processing file: Overview.md
  -> Success. Area: ["[[Pharma Data 101]]"], Tags: [pharma-domain, data-101, data, emr-data]
Processing file: Overview.md
  -> Success. Area: ["[[Pharma Data 101]]"], Tags: [pharma-domain, data-101, data, specialty-pharma]
Processing file: Phase 1 - Foundations.md
  -> Success. Area: ["[[Learning Path]]"], Tags: [pharma-domain, learning-path]
Processing file: Phase 2 - Tech & Data.md
  -> Success. Area: ["[[Learning Path]]"], Tags: [pharma-domain, learning-path]
Processing file: Phase 3 - Specialization.md
  -> Success. Area: ["[[Learning Path]]"], Tags: [pharma-domain, learning-path]
Processing file: Phase 4 - Validation & Learning.md
  -> Success. Area: ["[[Learning Path]]"], Tags: [pharma-domain, learning-path]
Processing file: 1.1 US Healthcare Ecosystem.md
  -> Success. Area: ["[[1.1 The US Healthcare Ecosystem]]"], Tags: [pharma-domain, training, 1.1-the-us

In [5]:
import os

def recursive_update_obsidian_properties():
    # 1. Define the root path
    root_path = r"C:\Users\BalasubramanianPG\Videos\Obsidian Vault\Pharma Domain\The Training Document"
    
    # 2. Define the key and the STRICT formatted line
    # Note: We MUST use quotes around "[[...]]" to force Obsidian to treat this as 
    # a single TEXT field rather than a List/Array.
    target_key = "Area"
    target_line = 'Area: "[[Overarching View]]"\n'
    
    if not os.path.exists(root_path):
        print(f"Error: The path '{root_path}' does not exist.")
        return

    files_processed = 0

    # 3. Walk through all folders recursively
    for current_root, dirs, files in os.walk(root_path):
        for filename in files:
            if filename.endswith(".md"):
                file_path = os.path.join(current_root, filename)
                
                try:
                    with open(file_path, 'r', encoding='utf-8') as f:
                        lines = f.readlines()
                    
                    updated_lines = []
                    has_frontmatter = False
                    
                    # Check if file has YAML frontmatter (starts with ---)
                    if len(lines) > 0 and lines[0].strip() == "---":
                        # Find the closing ---
                        end_idx = -1
                        for i in range(1, len(lines)):
                            if lines[i].strip() == "---":
                                end_idx = i
                                break
                        
                        if end_idx != -1:
                            has_frontmatter = True
                            property_found = False
                            
                            # Check existing properties inside the block
                            for i in range(1, end_idx):
                                # Check if line starts with 'Area:' (ignoring whitespace)
                                if lines[i].strip().startswith(f"{target_key}:"):
                                    lines[i] = target_line
                                    property_found = True
                                    break
                            
                            # If 'Area' wasn't found, add it before the closing ---
                            if not property_found:
                                lines.insert(end_idx, target_line)
                            
                            updated_lines = lines

                    # If no frontmatter exists, create it
                    if not has_frontmatter:
                        new_frontmatter = [
                            "---\n",
                            target_line,
                            "---\n",
                            "\n"
                        ]
                        updated_lines = new_frontmatter + lines

                    # Write the file back
                    with open(file_path, 'w', encoding='utf-8') as f:
                        f.writelines(updated_lines)
                        
                    files_processed += 1
                    print(f"Processed: {file_path}")

                except Exception as e:
                    print(f"Error processing {filename}: {e}")

    print(f"\nSuccess! Updated {files_processed} files.")

if __name__ == "__main__":
    recursive_update_obsidian_properties()

Processed: C:\Users\BalasubramanianPG\Videos\Obsidian Vault\Pharma Domain\The Training Document\Overarching View.md
Processed: C:\Users\BalasubramanianPG\Videos\Obsidian Vault\Pharma Domain\The Training Document\1.1 The US Healthcare Ecosystem\1.1 US Healthcare Ecosystem.md
Processed: C:\Users\BalasubramanianPG\Videos\Obsidian Vault\Pharma Domain\The Training Document\1.2 Market\1.2 Market.md
Processed: C:\Users\BalasubramanianPG\Videos\Obsidian Vault\Pharma Domain\The Training Document\1.3 Product Hirearchy\1.3 Product Hirearchy.md
Processed: C:\Users\BalasubramanianPG\Videos\Obsidian Vault\Pharma Domain\The Training Document\1.4 Geographical Hirearchy\1.4 Geographical Hirearchy.md
Processed: C:\Users\BalasubramanianPG\Videos\Obsidian Vault\Pharma Domain\The Training Document\1.5 Patient Prescription Summary\1.5 Patient Prescription Summary.md
Processed: C:\Users\BalasubramanianPG\Videos\Obsidian Vault\Pharma Domain\The Training Document\1.6 Rx Analytics\1.6 Rx Analytics.md
Processed: