In [15]:
!pip install -q sentence-transformers requests


In [16]:
import requests
import json
from datetime import datetime
from sentence_transformers import SentenceTransformer, util
import numpy as np

In [17]:
REPO = "Mintplex-Labs/anything-llm"
GITHUB_API = "https://api.github.com/repos"


In [18]:
def coletar_releases(repo, n=10):
    url = f"{GITHUB_API}/{repo}/releases?per_page={n}"
    return requests.get(url).json()

def coletar_branches(repo):
    url = f"{GITHUB_API}/{repo}/branches"
    return requests.get(url).json()

def coletar_prs(repo, n=20):
    url = f"{GITHUB_API}/{repo}/pulls?state=closed&per_page={n}"
    return requests.get(url).json()

In [19]:
releases = coletar_releases(REPO)

release_info = []
release_dates = []

for r in releases:
    if "tag_name" in r and "published_at" in r:
        release_info.append(f"{r['tag_name']} - {r['published_at']}")
        release_dates.append(r["published_at"])

release_description = f"""
The project has {len(release_info)} recent releases.
Releases follow semantic versioning.
Releases are published at regular intervals.
Recent releases include:
""" + "\n".join(release_info)


In [20]:
branches = coletar_branches(REPO)
prs = coletar_prs(REPO)

branch_names = [b["name"] for b in branches if "name" in b]

merge_targets = []
for pr in prs:
    if pr.get("base") and pr["base"].get("ref"):
        merge_targets.append(pr["base"]["ref"])

workflow_description = f"""
The repository contains the branches: {', '.join(branch_names)}.
Pull requests are usually merged into: {set(merge_targets)}.
The project uses pull requests for integration.
"""


In [21]:
release_strategies = {
    "Rapid Releases": "Frequent and continuous releases with short intervals between versions.",
    "Release Train": "Releases follow a predictable and scheduled cadence.",
    "LTS + Current": "Long-term support versions are maintained alongside current versions.",
    "Ad hoc / Irregular": "Releases occur irregularly without a defined schedule."
}

workflow_strategies = {
    "Git-flow": "Uses develop and main branches with feature, release, and hotfix branches.",
    "GitHub Flow": "Uses a single main branch with short-lived feature branches merged via pull requests.",
    "Trunk-Based Development": "Development happens directly on the main branch with very short-lived branches."
}


In [22]:
model = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L3-v2")

In [23]:
def classificar(texto, estrategias):
    texto_emb = model.encode(texto, convert_to_tensor=True)

    scores = {}
    for nome, definicao in estrategias.items():
        def_emb = model.encode(definicao, convert_to_tensor=True)
        score = util.cos_sim(texto_emb, def_emb).item()
        scores[nome] = score

    melhor = max(scores, key=scores.get)
    return melhor, scores


In [24]:
release_result, release_scores = classificar(
    release_description,
    release_strategies
)

workflow_result, workflow_scores = classificar(
    workflow_description,
    workflow_strategies
)


In [25]:
resultado_final = {
    "repositorio": REPO,
    "data_analise": datetime.now().isoformat(),
    "analise_releases": {
        "descricao": release_description.strip(),
        "estrategia_identificada": release_result,
        "scores": release_scores
    },
    "analise_workflow": {
        "descricao": workflow_description.strip(),
        "estrategia_identificada": workflow_result,
        "scores": workflow_scores
    }
}

with open("resultado_analise_anythingllm.json", "w", encoding="utf-8") as f:
    json.dump(resultado_final, f, indent=2, ensure_ascii=False)


In [26]:
# -------- TXT COM OS DADOS ANALISADOS --------
with open("dados_analisados_anythingllm.txt", "w", encoding="utf-8") as f:
    f.write("REPOSIT√ìRIO ANALISADO\n")
    f.write(f"{REPO}\n\n")

    f.write("AN√ÅLISE DE RELEASES \n")
    f.write(release_description.strip())
    f.write("\n\n")

    f.write("AN√ÅLISE DE WORKFLOW / BRANCHES \n")
    f.write(workflow_description.strip())
    f.write("\n")


# -------- TXT COM O RESULTADO FINAL --------
with open("analise_anythingllm.txt", "w", encoding="utf-8") as f:
    f.write("RESULTADO DA AN√ÅLISE DO REPOSIT√ìRIO\n")
    f.write(f"Reposit√≥rio: {REPO}\n")
    f.write(f"Data da an√°lise: {datetime.now().isoformat()}\n\n")

    f.write("ESTRAT√âGIA DE RELEASES:\n")
    f.write(f"Estrat√©gia identificada: {release_result}\n\n")
    f.write("Scores de similaridade:\n")
    for k, v in release_scores.items():
        f.write(f"- {k}: {v:.4f}\n")

    f.write("\nFLUXO DE TRABALHO (BRANCHES)\n")
    f.write(f"Estrat√©gia identificada: {workflow_result}\n\n")
    f.write("Scores de similaridade:\n")
    for k, v in workflow_scores.items():
        f.write(f"- {k}: {v:.4f}\n")


In [27]:
print("üìÅ Arquivos gerados com sucesso:")
print("- resultado_analise_anythingllm.json")
print("- dados_analisados_anythingllm.txt")
print("- analise_anythingllm.txt")


üìÅ Arquivos gerados com sucesso:
- resultado_analise_anythingllm.json
- dados_analisados_anythingllm.txt
- analise_anythingllm.txt


In [28]:
from google.colab import drive
drive.mount('/content/drive')

MessageError: Error: credential propagation was unsuccessful