In [3]:
import os
import re

def is_markdown_file(filename):
    return filename.endswith('.md')

def remove_yaml_frontmatter(content):
    return re.sub(r'^---\s*[\s\S]*?---\s*', '', content, flags=re.MULTILINE)

def remove_headers(content):
    return re.sub(r'^#+.*$', '', content, flags=re.MULTILINE)

def count_words(text):
    return len(re.findall(r'\w+', text))

def process_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()
    
    # Remove YAML frontmatter
    content = remove_yaml_frontmatter(content)
    
    # Remove headers
    content = remove_headers(content)
    
    # Count words
    return count_words(content)

def count_words_in_vault(vault_path):
    total_words = 0
    for root, dirs, files in os.walk(vault_path):
        for file in files:
            if is_markdown_file(file):
                try:
                    file_path = os.path.join(root, file)
                    word_count = process_file(file_path)
                    total_words += word_count
                    print(f"{file}: {word_count} words")
                except Exception as e:
                    total_words = 0
                    print(f"excepted {file_path} as {e}")
        
    return total_words

if __name__ == "__main__":
    vault_path = input("Enter the path to your Obsidian vault: ")
    total_words = count_words_in_vault(vault_path)
    print(f"\nTotal words in the vault (excluding headers and YAML frontmatter): {total_words}")

Enter the path to your Obsidian vault:  C:\Users\jacks\Documents\Jack's Zettelkasten


Obsidian Clean Tasks.md: 162 words
A Week at the Cottage pt 2.md: 79 words
A Week At the Cottage.md: 178 words
Long Term Goals.md: 381 words
Aug 1, 2024.md: 75 words
Aug 10, 2024.md: 22 words
Aug 11, 2024.md: 18 words
Aug 12, 2024.md: 131 words
Aug 13, 2024.md: 50 words
Aug 14, 2024.md: 52 words
Aug 15, 2024.md: 94 words
Aug 16, 2024.md: 102 words
Aug 17, 2024.md: 100 words
Aug 18, 2024.md: 310 words
Aug 19, 2024.md: 288 words
Aug 2, 2024.md: 13 words
Aug 20, 2024.md: 96 words
Aug 21, 2024.md: 43 words
Aug 22, 2024.md: 159 words
Aug 23, 2024.md: 29 words
Aug 24, 2024.md: 73 words
Aug 25, 2024.md: 1 words
Aug 3, 2024.md: 22 words
Aug 4, 2024.md: 30 words
Aug 5, 2024.md: 14 words
Aug 6, 2024.md: 27 words
Aug 7, 2024.md: 65 words
Aug 8, 2024.md: 28 words
Aug 9, 2024.md: 42 words
Jul 1, 2024.md: 12 words
Jul 10, 2024.md: 57 words
Jul 11, 2024.md: 44 words
Jul 12, 2024.md: 33 words
Jul 13, 2024.md: 75 words
Jul 14, 2024.md: 32 words
Jul 15, 2024.md: 42 words
Jul 16, 2024.md: 47 words
Jul 17