# Requirements

In [75]:
import os
import re
from collections import defaultdict
from datetime import datetime
import yaml
import math


In [158]:
vault_path = r"C:\Users\nonak\Documents\Thougts"
second_path = r"C:\Users\nonak\Documents\MyObsidianSetup"

# Property Report

## Get Meta data

In [73]:
def get_file_metadata(file_path):
    """Get enhanced file metadata with relative path"""
    stat = os.stat(file_path)
    return {
        'created': datetime.fromtimestamp(stat.st_ctime),
        'modified': datetime.fromtimestamp(stat.st_mtime),
        'size_kb': stat.st_size / 1024,
        'rel_path': os.path.relpath(file_path, start=directory_to_scan)
    }

def analyze_frontmatter(directory):
    """Extract and analyze all frontmatter data with statistical insights"""
    property_stats = defaultdict(Counter)
    file_count = 0
    dates = []
    sizes = []
    property_presence = Counter()
    
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith(".md"):
                file_path = os.path.join(root, file)
                file_count += 1
                
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read()

                if content.startswith("---"):
                    end_of_header = content.find("---", 3)
                    if end_of_header != -1:
                        header = content[3:end_of_header].strip()
                        try:
                            metadata = get_file_metadata(file_path)
                            dates.append(metadata['created'])
                            sizes.append(metadata['size_kb'])
                            
                            data = yaml.safe_load(header) or {}
                            for prop, value in data.items():
                                property_presence[prop] += 1
                                if isinstance(value, list):
                                    for item in value:
                                        property_stats[prop][str(item)] += 1
                                else:
                                    property_stats[prop][str(value)] += 1
                        except yaml.YAMLError:
                            pass
    
    # Calculate date statistics
    date_stats = {}
    if dates:
        sorted_dates = sorted(dates)
        date_stats = {
            'oldest': min(dates),
            'newest': max(dates),
            'timespan': max(dates) - min(dates),
            'median': sorted_dates[len(sorted_dates)//2],
            'total_files': file_count,
            'files_per_day': file_count / (max(dates) - min(dates)).days if len(dates) > 1 else 0
        }
    
    # Calculate size statistics
    size_stats = {}
    if sizes:
        size_stats = {
            'total_mb': sum(sizes) / 1024,
            'avg_kb': sum(sizes) / len(sizes),
            'largest_kb': max(sizes),
            'smallest_kb': min(sizes)
        }
    
    return {
        'property_stats': dict(property_stats),
        'property_presence': property_presence,
        'date_stats': date_stats,
        'size_stats': size_stats,
        'total_files': file_count
    }


## generate report


In [168]:

def generate_statistical_report(data, output_file="markdown_stats_report.md"):
    """Generate a statistics-focused markdown report"""
    with open(output_file, 'w', encoding='utf-8') as md:
        # Report Header
        md.write("# Markdown Statistics Report\n\n")
        
        md.write(f"> **Analysis performed:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
        md.write(f"> **Total files analyzed:** {data['total_files']:,}\n\n")
        
        if data['date_stats']:
            stats = data['date_stats']
            md.write(f"> **Date range:** {stats['oldest'].strftime('%Y-%m-%d')} to {stats['newest'].strftime('%Y-%m-%d')}\n")
            md.write(f"> **Timespan:** {stats['timespan'].days} days\n")
            md.write(f"> **Average files per day:** {stats['files_per_day']:.2f}\n\n")
            md.write(f"[[_index_notas|Acessar Index todas as notas]]")
        else:
            md.write("*No date information available*\n")
        
        # # Size Analysis
        # md.write("\n## 📦 Size Analysis\n")
        # if data['size_stats']:
        #     stats = data['size_stats']
        #     md.write(f"- **Total content size:** {stats['total_mb']:.2f} MB\n")
        #     md.write(f"- **Average file size:** {stats['avg_kb']:.1f} KB\n")
        #     md.write(f"- **Largest file:** {stats['largest_kb']:.1f} KB\n")
        #     md.write(f"- **Smallest file:** {stats['smallest_kb']:.1f} KB\n")
        #     md.write(f"- **Size range:** {stats['largest_kb']/stats['smallest_kb']:.1f}x variation\n")
        # else:
        #     md.write("*No size information available*\n")
        
        # Property Prevalence
        md.write("\n\n\n## Property Counts\n")
        md.write("| Property | Files | Coverage |\n")
        md.write("|----------|-------|----------|\n")
        for prop, count in data['property_presence'].most_common(15):
            coverage = (count / data['total_files']) * 100
            md.write(f"| `{prop}` | {count} | {coverage:.1f}% |\n")
        
        # Property Value Analysis
        md.write("\n## By Property\n")
        for prop, counter in data['property_stats'].items():
            total = sum(counter.values())
            unique = len(counter)
            value_counts = f"\n> **Present in:** {data['property_presence'][prop]} files ({data['property_presence'][prop]/data['total_files']:.1%})\n"
            md.write(f"\n##### `{prop}` {value_counts}")
                
            # Show value distribution if not too large
            if unique <= 15:
                md.write("\n**Value Distribution:**\n")
                for value, count in counter.most_common():
                    md.write(f"- `{value}`: {count} ({count/total:.1%})\n")
            else:
                top_values = counter.most_common(5)
                others = total - sum(count for _, count in top_values)
                md.write("\n> **Top Values:**\n")
                for value, count in top_values:
                    md.write(f"- `{value}`: {count} ({count/total:.1%})\n")
                md.write(f"\n> *Others ({unique-5} values)*: {others} ({others/total:.1%})\n")
            
            # Calculate Gini coefficient for inequality
            # if unique > 1:
            #     sorted_counts = sorted(counter.values())
            #     n = len(sorted_counts)
            #     gini = sum(abs(x-y) for x in sorted_counts for y in sorted_counts) / (2*n*sum(sorted_counts))
            #     md.write(f"- **Value inequality (Gini):** {gini:.3f} (0=equal, 1=unequal)\n")
        
        # Recommendations Section
        # md.write("\n## 🚀 Recommendations\n")
        # md.write("### Based on your metadata patterns:\n")
        
        # # Property standardization opportunities
        # common_props = [p for p, c in data['property_presence'].most_common(5) if c/data['total_files'] > 0.7]
        # if common_props:
        #     md.write("- These properties are nearly universal and could be enforced:\n")
        #     for prop in common_props:
        #         md.write(f"  - `{prop}` (in {data['property_presence'][prop]/data['total_files']:.0%} of files)\n")
        
        # # Underused properties
        # rare_props = [p for p, c in data['property_presence'].items() if 0 < c/data['total_files'] < 0.1]
        # if rare_props:
        #     md.write("\n- These properties are rarely used and might need review:\n")
        #     for prop in rare_props[:5]:
        #         md.write(f"  - `{prop}` (only {data['property_presence'][prop]} files)\n")
        
        # # High-value-diversity properties
        # diverse_props = []
        # for prop, counter in data['property_stats'].items():
        #     unique = len(counter)
        #     total = sum(counter.values())
        #     if unique > 10 and total/unique < 3:
        #         diverse_props.append((prop, unique))
        
        # if diverse_props:
        #     md.write("\n- These properties have many unique values with low repetition:\n")
        #     for prop, unique in sorted(diverse_props, key=lambda x: x[1], reverse=True)[:3]:
        #         md.write(f"  - `{prop}` ({unique} unique values)\n")
        #         values_sample = ", ".join(f"`{v}`" for v, _ in data['property_stats'][prop].most_common(3))
        #         md.write(f"    *Sample values:* {values_sample}\n")

# Configuration
directory_to_scan = r"C:\Users\nonak\Documents\Thoughts"
output_file = r"C:\Users\nonak\Documents\Thoughts\PropertyAnalysis.md"

# Run analysis and generate report
data = analyze_frontmatter(directory_to_scan)
generate_statistical_report(data, output_file)
print(f"Statistical report generated: {output_file}")

Statistical report generated: C:\Users\nonak\Documents\Thoughts\PropertyAnalysis.md


# create a index note with all notes by folder

In [170]:
import os
from collections import defaultdict

def contar_palavras(texto):
    return len(texto.split())

def listar_notas_markdown_organizadas(pasta_raiz):
    notas_por_pasta = defaultdict(list)
    contagem_palavras_por_pasta = defaultdict(int)
    total_palavras_geral = 0

    for raiz, _, arquivos in os.walk(pasta_raiz):
        caminho_relativo = os.path.relpath(raiz, pasta_raiz)
        for arquivo in arquivos:
            if arquivo.endswith(".md"):
                caminho_completo = os.path.join(raiz, arquivo)
                nome_nota = os.path.splitext(arquivo)[0]
                
                with open(caminho_completo, 'r', encoding='utf-8') as f:
                    conteudo = f.read()
                    palavras = contar_palavras(conteudo)
                    total_palavras_geral += palavras
                    contagem_palavras_por_pasta[caminho_relativo] += palavras
                    notas_por_pasta[caminho_relativo].append((nome_nota, palavras))

    return notas_por_pasta, contagem_palavras_por_pasta, total_palavras_geral

def salvar_em_markdown(notas_organizadas, contagem_por_pasta, total_geral, caminho_saida):
    with open(caminho_saida, 'w', encoding='utf-8') as f:
        f.write(f"# Índice de Notas Markdown\n\n")
        for pasta, notas in sorted(notas_organizadas.items()):
            titulo_pasta = pasta if pasta != '.' else '[raiz]'
            f.write(f"### {titulo_pasta} — {len(notas)} notas\n")
            for nome_nota, palavras in sorted(notas):
                f.write(f"- [{nome_nota}] — {palavras} palavras\n")
            f.write(f"\n**Total nesta pasta**: {contagem_por_pasta[pasta]} palavras\n\n")

        f.write("---\n")
        f.write(f"## 📊 Total geral: {total_geral} palavras\n")

# Caminhos
caminho_da_pasta = r"C:\Users\nonak\Documents\Thoughts"
caminho_arquivo_saida = os.path.join(caminho_da_pasta, "_index_notas.md")

# Execução
notas_organizadas, contagem_por_pasta, total_palavras = listar_notas_markdown_organizadas(caminho_da_pasta)
salvar_em_markdown(notas_organizadas, contagem_por_pasta, total_palavras, caminho_arquivo_saida)

print(f"Arquivo salvo em: {caminho_arquivo_saida}")

Arquivo salvo em: C:\Users\nonak\Documents\Thoughts\_index_notas.md


# daily Quote extraction

In [6]:
def extract_quotes_to_markdown(folder_path, output_path):
    quote_pattern = re.compile(r"> \[!quote\] (.+?)\n> — (.+)")
    all_entries = []

    for filename in os.listdir(folder_path):
        if filename.endswith(".md"):
            file_path = os.path.join(folder_path, filename)
            with open(file_path, 'r', encoding='utf-8') as file:
                content = file.read()
                matches = quote_pattern.findall(content)
                for quote, author in matches:
                    entry = f"> [!quote] {quote.strip()}\n> — {author.strip()}\n*Origem: {filename}*\n"
                    all_entries.append(entry)

    # Write to a markdown file
    with open(output_path, 'w', encoding='utf-8') as out_file:
        out_file.write("# Coletânea de Citações\n\n")
        for entry in all_entries:
            out_file.write(entry + "\n")

# Uso
folder = r"C:\Users\nonak\Documents\Thoughts\Calendar\DAILY"
output_md = r"C:\Users\nonak\Documents\Thoughts\quotes.md"
extract_quotes_to_markdown(folder, output_md)
print(f"Arquivo gerado com sucesso em: {output_md}")


Arquivo gerado com sucesso em: C:\Users\nonak\Documents\Thoughts\quotes.md


In [None]:
def buscar_arquivos_por_palavra(caminho_raiz, palavra_chave):
    caminhos_encontrados = []

    for raiz, _, arquivos in os.walk(caminho_raiz):
        for nome_arquivo in arquivos:
            caminho_completo = os.path.join(raiz, nome_arquivo)
            try:
                with open(caminho_completo, 'r', encoding='utf-8') as arquivo:
                    conteudo = arquivo.read()
                    if palavra_chave in conteudo:
                        caminhos_encontrados.append(caminho_completo)
            except (UnicodeDecodeError, PermissionError, FileNotFoundError):
                # Ignora arquivos que não podem ser lidos como texto
                continue

    return caminhos_encontrados

# Exemplo de uso
if __name__ == "__main__":
    caminho = input("Digite o caminho do projeto: ").strip()
    palavra = input("Digite a palavra a buscar: ").strip()
    encontrados = buscar_arquivos_por_palavra(caminho, palavra)

    print("\nArquivos contendo a palavra:")
    for arquivo in encontrados:
        print(arquivo)
        

Digite o caminho do projeto:  C:\Users\nonak\AppData\Local
