In [None]:
# Instalar dependências
!pip install -q sentence-transformers scikit-learn

# Importar bibliotecas
from sentence_transformers import SentenceTransformer
from sklearn.cluster import KMeans
import numpy as np


# Carregar modelo All-MiniLM-L6-v2 ------------------------------
model = SentenceTransformer('all-MiniLM-L6-v2')


# Ler arquivo entrada5.txt ------------------------------
with open('entrada5.txt', 'r', encoding='utf-8') as f:
    entrada = f.read()


linhas = [linha.strip() for linha in entrada.split('\n') if linha.strip()]


# Separar manualmente seções de branches e releases ---------------------------
branches = []
releases = []
modo = None
for linha in linhas:
    if linha.lower().startswith("branches"):
        modo = "branches"
        continue
    if linha.lower().startswith("releases"):
        modo = "releases"
        continue
    if modo == "branches":
        branches.append(linha)
    elif modo == "releases":
        releases.append(linha)


# Gerar embeddings ------------------------------
branch_embeddings = model.encode(branches)
release_embeddings = model.encode(releases)


# Agrupar releases e branches com KMeans ------------------------------
# Releases: cluster 2 (features vs patches)
kmeans_release = KMeans(n_clusters=2, random_state=42)
release_clusters = kmeans_release.fit_predict(release_embeddings)

# Branches: cluster 3 (master, features, bugs)
kmeans_branch = KMeans(n_clusters=3, random_state=42)
branch_clusters = kmeans_branch.fit_predict(branch_embeddings)


# Interpretar clusters para estratégia ------------------------------
# Releases
num_patches = sum(1 for c in release_clusters if c == 0)
num_features = sum(1 for c in release_clusters if c == 1)
if num_patches > 0 and num_features > 0:
    estrategia_releases = "Rapid Release (entregas frequentes de patches e features)"
else:
    estrategia_releases = "Release Train / LTS (poucos releases grandes ou regulares)"

# Branches
# Identificação simples baseada em nomes
master_branch = any("master" in b.lower() for b in branches)
feature_branch = any("feat" in b.lower() for b in branches)
bug_branch = any("bug" in b.lower() for b in branches)
if master_branch and feature_branch and bug_branch:
    fluxo_trabalho = "Gitflow simplificado (master para produção, branches curtas para features e bugs)"
else:
    fluxo_trabalho = "GitHub Flow (merge direto em master)"


# Resultados ------------------------------
print("=== Releases agrupados ===")
for r, c in zip(releases, release_clusters):
    print(f"[Cluster {c}] {r}")

print("\n=== Branches agrupados ===")
for b, c in zip(branches, branch_clusters):
    print(f"[Cluster {c}] {b}")

print("\n=== Estratégia de Releases sugerida ===")
print(estrategia_releases)

print("\n=== Modelo de Fluxo de Trabalho sugerido ===")
print(fluxo_trabalho)


=== Releases agrupados ===
[Cluster 0] v1.9.1 – Dec 9, 2025
[Cluster 0] v1.9.0 – Oct 9, 2025
[Cluster 0] v1.8.5 – Aug 14, 2025
[Cluster 0] v1.8.4 – Jul 16, 2025
[Cluster 0] v1.8.3 – Jul 9, 2025
[Cluster 0] v1.8.2 – Jun 10, 2025
[Cluster 0] v1.8.1 – May 5, 2025
[Cluster 0] v1.8.0 – Apr 18, 2025
[Cluster 0] v1.7.8 – Mar 26, 2025
[Cluster 0] v1.7.6 – Mar 19, 2025
[Cluster 0] v1.7.5 – Mar 4, 2025
[Cluster 0] v1.7.4 – Feb 17, 2025
[Cluster 1] v1.4.0 – Jan 29, 2025
[Cluster 1] v1.3.0 – Nov 26, 2024
[Cluster 1] v1.2.4 – Oct 24, 2024
[Cluster 1] v1.2.3 – Oct 3, 2024
[Cluster 1] v1.2.2 – Sep 11, 2024
[Cluster 1] v1.2.1 – Aug 19, 2024
[Cluster 1] v1.2.0 – Aug 7, 2024
[Cluster 1] v1.1.1 – Aug 2, 2024
[Cluster 1] v1.1.0 – Jul 31, 2024
[Cluster 1] v1.0.0 – Jul 26, 2024

=== Branches agrupados ===
[Cluster 1] 1.8.3-rerelease
[Cluster 1] 1.9.1-rerelease
[Cluster 2] 1029-feat-hf-serverless-inference-api
[Cluster 2] 1086-feat-implement-normalized-input-fields
[Cluster 2] 1297-feat-gemini-agent-support
