Step 1 - the setup

In [None]:
# --- Imports ---
import os
from pathlib import Path
import frontmatter
import pandas as pd

# --- Vault Path (edit when you change your vault location) ---
VAULT_PATH = r"C:\Users\ASUS\Videos\AnyDesk\Balasubramanian PG"
vault = Path(VAULT_PATH)

print("Vault loaded:", vault)


Step 2 - Scan vault for all markdown files

In [None]:
def get_all_md_files(vault: Path):
    return [p for p in vault.rglob("*.md")]

md_files = get_all_md_files(vault)
len(md_files)

Step 3. Folder-wise and subfolder-wise structure summary

In [None]:
def folder_summary(md_files):
    data = []
    for file in md_files:
        rel = file.relative_to(vault)
        folder = rel.parent.as_posix()
        data.append([folder, file.name, str(rel)])
    df = pd.DataFrame(data, columns=["folder", "file", "relative_path"])
    return df

df_files = folder_summary(md_files)
df_files.head()


Step 4. Folder count summary

In [None]:
folder_counts = (
    df_files.groupby("folder")
    .size()
    .reset_index(name="markdown_count")
    .sort_values(by="markdown_count", ascending=False)
)

folder_counts

Step 5. Extract ALL YAML properties across vault

In [None]:
def extract_yaml_properties(md_files):
    props = []
    for file in md_files:
        try:
            fm = frontmatter.load(file)
            if isinstance(fm.metadata, dict):
                props.extend(fm.metadata.keys())
        except:
            pass
    return props

all_props = extract_yaml_properties(md_files)
unique_props = sorted(set(all_props))

print("Total YAML properties found:", len(unique_props))
unique_props[:50]   # preview first 50


Step 6. YAML Property Frequency

In [None]:
import collections

prop_frequency = collections.Counter(all_props)
prop_df = pd.DataFrame(prop_frequency.items(), columns=["property", "count"]).sort_values(by="count", ascending=False)
prop_df


Step 7. Export Analytics to CSV (optional)

In [None]:
folder_counts.to_csv("folder_counts.csv", index=False)
prop_df.to_csv("yaml_properties_frequency.csv", index=False)
df_files.to_csv("vault_file_list.csv", index=False)

print("Exported analytics successfully")
