In [None]:
!pip install transformers datasets gitpython tqdm torch

from transformers import pipeline
from git import Repo
from tqdm import tqdm
import os

# 1. Clonar repositório
repo_url = "https://github.com/Mintplex-Labs/anything-llm"
repo_dir = "anything-llm"

if not os.path.exists(repo_dir):
    print("Clonando repositório...")
    Repo.clone_from(repo_url, repo_dir)

# 2. Carregar modelo bart-large-mnli
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# 3. Padrões arquiteturais + descrições
architectural_patterns = {
    "MVC (Model-View-Controller)": "Separates software into Model, View, and Controller layers to organize logic and interface independently.",
    "MVVM (Model-View-ViewModel)": "Enhances separation between UI and logic through reactive data binding and ViewModel mediation.",
    "Clean Architecture": "Organizes the system in concentric layers isolating business rules from frameworks and external details.",
    "Hexagonal Architecture": "Builds systems around a domain core using ports and adapters to allow flexible integration layers.",
    "Layered Architecture": "Traditional N-layer approach where presentation, business, and data layers interact hierarchically.",
    "Microservices": "Application composed of independent services communicating via lightweight APIs.",
    "Event-Driven Architecture": "Components communicate by producing and reacting to asynchronous events.",
    "Monolithic Architecture": "Single deployable unit where all logic resides in one tightly integrated codebase."
}

pattern_labels = list(architectural_patterns.keys())
pattern_descriptions = list(architectural_patterns.values())

# 4. Função segura para leitura de arquivos
def read_file_safe(path):
    try:
        with open(path, "r", encoding="utf-8") as f:
            return f.read()
    except:
        return ""

# 5. Caminhos e arquivos de saída
input_log = "arquivos_lidos.txt"
output_log = "resultados_padroes.txt"

open(input_log, "w").close()
open(output_log, "w").close()

# 6. Analisar arquivos .js
print("Analisando apenas arquivos .js ...")

for root, _, files in os.walk(repo_dir):
    for file in tqdm(files, desc="Arquivos"):
        if not file.endswith(".js"):
            continue

        path = os.path.join(root, file)
        code = read_file_safe(path)

        if not code.strip():
            continue

        # salvar path do arquivo lido
        with open(input_log, "a", encoding="utf-8") as f:
            f.write(path + "\n")

        snippet = code[:2000]

        result = classifier(
            snippet,
            pattern_descriptions,
            multi_label=True
        )

        # pegar top 3
        top3 = sorted(
            zip(pattern_labels, result["scores"]),
            key=lambda x: x[1],
            reverse=True
        )[:3]

        # salvar saída
        with open(output_log, "a", encoding="utf-8") as f:
            f.write(f"\nArquivo: {path}\n")
            for label, score in top3:
                f.write(f"- {label}: {score:.4f}\n")

print("\nProcesso concluído!")
print(f"Arquivos lidos: {input_log}")
print(f"Resultados: {output_log}")


Clonando repositório...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cpu


Analisando apenas arquivos .js ...


Arquivos: 100%|██████████| 17/17 [01:21<00:00,  4.81s/it]
Arquivos: 0it [00:00, ?it/s]
Arquivos: 0it [00:00, ?it/s]
Arquivos: 100%|██████████| 3/3 [00:00<00:00, 33026.02it/s]
Arquivos: 100%|██████████| 3/3 [00:00<00:00, 56679.78it/s]
Arquivos: 100%|██████████| 1/1 [00:00<00:00, 14768.68it/s]
Arquivos: 100%|██████████| 9/9 [00:52<00:00,  5.84s/it]
Arquivos: 100%|██████████| 2/2 [00:23<00:00, 11.72s/it]
Arquivos: 100%|██████████| 1/1 [00:00<00:00, 18236.10it/s]
Arquivos: 100%|██████████| 1/1 [00:00<00:00, 16194.22it/s]
Arquivos: 100%|██████████| 1/1 [00:00<00:00, 13486.51it/s]
Arquivos: 100%|██████████| 1/1 [00:00<00:00, 23301.69it/s]
Arquivos: 100%|██████████| 1/1 [00:00<00:00, 12018.06it/s]
Arquivos: 100%|██████████| 1/1 [00:00<00:00, 20661.60it/s]
Arquivos: 100%|██████████| 1/1 [00:00<00:00, 26715.31it/s]
Arquivos: 100%|██████████| 1/1 [00:00<00:00, 13662.23it/s]
Arquivos: 100%|██████████| 1/1 [00:00<00:00, 24385.49it/s]
Arquivos: 100%|██████████| 1/1 [00:00<00:00, 22550.02it/s]
Arqui