In [2]:
import os
from collections import defaultdict, Counter

def extract_values_and_notes(directory, target_property="HUB"):
    property_to_notes = defaultdict(list)

    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith(".md"):
                file_path = os.path.join(root, file)
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read()

                if content.startswith("---"):
                    end_of_header = content.find("---", 3)
                    if end_of_header != -1:
                        header = content[3:end_of_header].strip()
                        inside_property = False
                        for line in header.splitlines():
                            line = line.strip()
                            if line.startswith(f"{target_property}:"):
                                inside_property = True
                            elif inside_property:
                                if line.startswith("-"):
                                    property_value = line.strip("- ").strip()
                                    property_to_notes[property_value].append(file)
                                elif not line or line.startswith("#"):
                                    inside_property = False
    return property_to_notes

def count_unique_values(property_to_notes):
    return {prop: len(notes) for prop, notes in property_to_notes.items()}

def extract_header_values_from_markdown(directory):
    header_values = defaultdict(list)

    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith(".md"):
                file_path = os.path.join(root, file)
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read()

                if content.startswith("---"):
                    end_of_header = content.find("---", 3)
                    if end_of_header != -1:
                        header = content[3:end_of_header].strip()
                        current_key = None
                        for line in header.splitlines():
                            line = line.strip()
                            if ":" in line:
                                key, value = map(str.strip, line.split(":", 1))
                                current_key = key
                                if value:
                                    header_values[key].append(value)
                            elif line.startswith("-") and current_key:
                                value = line.strip("- ").strip()
                                header_values[current_key].append(value)
    return header_values

def count_all_unique_values(header_values):
    return {key: Counter(values) for key, values in header_values.items()}


# Caminho para os arquivos Markdown
directory_to_scan = r"C:\Users\nonak\Documents\Thoughts"

# Processamento da propriedade 'HUB'
property_to_notes = extract_values_and_notes(directory_to_scan, target_property="HUB")
unique_property_counts = count_unique_values(property_to_notes)

# Exibir valores únicos da propriedade 'HUB'
print("Contagem de valores únicos da propriedade 'HUB':")
for prop, count in sorted(unique_property_counts.items(), key=lambda item: item[1], reverse=True):
    print(f"{prop}: {count}")

total_hub = sum(unique_property_counts.values())
print(f"\nTotal geral de arquivos associados a valores 'HUB': {total_hub}")

print("\n -------------------------------------------------------------------------------- \n")

# Análise geral de propriedades
header_values = extract_header_values_from_markdown(directory_to_scan)
unique_value_counts = count_all_unique_values(header_values)

print("\nContagem de valores únicos por propriedade (ordenado por contagem):")
for key, counter in unique_value_counts.items():
    print(f"\nPropriedade: {key}")
    for value, count in sorted(counter.items(), key=lambda item: item[1], reverse=True):
        print(f"  {value}: {count}")
    total_prop = sum(counter.values())
    print(f"  >> Total: {total_prop}")


Contagem de valores únicos da propriedade 'HUB':
"[[hub-python]]": 250
"[[hub-collectors]]": 196
"[[hub-aoc]]": 107
"[[hub-statistic]]": 80
"[[hub-mkt]]": 73
"[[hub-SistemaOperacional]]": 73
"[[hub-tratamento-de-dados]]": 71
"[[hub-data-visualization]]": 50
"[[hub-ect]]": 34
"[[hub-probabilidade]]": 30
"[[hub-social-midia]]": 30
"[[hub-copy]]": 27
"[[hub-math]]": 27
"[[hub-ml-models]]": 26
"[[hub-tec]]": 23
"[[hub-logic]]": 21
"[[hub-growth]]": 21
"[[hub-hypothesis-testing]]": 19
"[[hub-descriptive-analysis]]": 19
"[[hub-sql]]": 17
"[[hub-writing]]": 17
"[[hub-GCTI]]": 16
"[[hub-pkm]]": 12
"[[hub-design]]": 7
"[[hub-thinking]]": 7
"[[hub-bi]]": 6
learning/review: 6
"[[hub-psyche]]": 4
"[[hub-music]]": 4
learning: 3
"[[]]": 3
"[[hub-filosofia]]": 2
"[[hub-git]]": 2
hip-hop: 2
"[[hub-llm]]": 1
data-types: 1
"[[hub-work]]": 1
"[[hub-so]]": 1
"[[hub-prompts]]": 1
"[[Efforts/AREAS/bbHarry/AprendizadoDobb.md|AprendizadoDobb]]": 1
dashboard: 1
wide-page: 1
thinking: 1
tocomplete: 1
Art rock: 

In [3]:
import os

In [None]:
def buscar_arquivos_por_palavra(caminho_raiz, palavra_chave):
    caminhos_encontrados = []

    for raiz, _, arquivos in os.walk(caminho_raiz):
        for nome_arquivo in arquivos:
            caminho_completo = os.path.join(raiz, nome_arquivo)
            try:
                with open(caminho_completo, 'r', encoding='utf-8') as arquivo:
                    conteudo = arquivo.read()
                    if palavra_chave in conteudo:
                        caminhos_encontrados.append(caminho_completo)
            except (UnicodeDecodeError, PermissionError, FileNotFoundError):
                # Ignora arquivos que não podem ser lidos como texto
                continue

    return caminhos_encontrados

# Exemplo de uso
if __name__ == "__main__":
    caminho = input("Digite o caminho do projeto: ").strip()
    palavra = input("Digite a palavra a buscar: ").strip()
    encontrados = buscar_arquivos_por_palavra(caminho, palavra)

    print("\nArquivos contendo a palavra:")
    for arquivo in encontrados:
        print(arquivo)
        

Digite o caminho do projeto:  C:\Users\nonak\AppData\Local
