Clonar o reposit√≥rio

In [1]:
!git clone https://github.com/FernandaKaroliny/Evolucao_Software_2025-2_Anything_llm_atividade2.git

Cloning into 'Evolucao_Software_2025-2_Anything_llm_atividade2'...
remote: Enumerating objects: 45, done.[K
remote: Counting objects: 100% (45/45), done.[K
remote: Compressing objects: 100% (38/38), done.[K
remote: Total 45 (delta 12), reused 33 (delta 6), pack-reused 0 (from 0)[K
Receiving objects: 100% (45/45), 937.33 KiB | 3.06 MiB/s, done.
Resolving deltas: 100% (12/12), done.


Execu√ß√£o de bart_mnli+entrada_reduzida.py

In [2]:
import re
import unicodedata
import time
import os
from transformers import pipeline, set_seed

# =========================
# REPRODUTIBILIDADE
# =========================
set_seed(42)

# =========================
# CONFIGURA√á√ïES
# =========================
ARQUIVO_ENTRADA = "Evolucao_Software_2025-2_Anything_llm_atividade2/entradas/entrada6.txt"
ARQUIVO_SAIDA_TXT = "Evolucao_Software_2025-2_Anything_llm_atividade2/resultados/bart_mnli+entrada_reduzida.txt"

MAX_RESUMO_CHARS = 8000

# =========================
# R√ìTULOS DE CLASSIFICA√á√ÉO
# =========================

PADROES_RELEASE = [
    "Continuous Deployment (every code change that passes automated tests is automatically deployed to production without manual approval)",
    "Continuous Delivery (code is continuously integrated and tested, remaining ready for deployment at any time, but releases require a manual decision)",
    "Scheduled Releases (software is released at predefined time intervals, such as weekly, monthly, or at fixed calendar dates)",
    "Big Bang Release (a large set of features is released simultaneously in a single major deployment event)",
    "Canary Release (a new version is gradually rolled out to a small subset of users to monitor behavior before full deployment)",
    "Blue-Green Deployment (two identical production environments are maintained, allowing instant traffic switch between old and new versions)",
    "Rolling Release (updates are deployed incrementally across servers or users without downtime, replacing old versions progressively)"
]

PADROES_FLUXO = [
    "Git Flow (uses multiple long-lived branches such as main, develop, feature, release, and hotfix to manage parallel development and releases)",
    "GitHub Flow (a lightweight workflow based on a single main branch with short-lived feature branches and pull requests)",
    "GitLab Flow (extends GitHub Flow by introducing environment-based or release-based branching strategies)",
    "Trunk-Based Development (developers commit small, frequent changes directly to a shared main branch, relying on feature flags and continuous integration)",
    "Feature Branch Workflow (each new feature or change is developed in an isolated branch and merged into the main branch after review)"
]

# =========================
# MODELO (APENAS MNLI)
# =========================

print("üß† Carregando modelo BART-MNLI...")
classifier = pipeline(
    "zero-shot-classification",
    model="facebook/bart-large-mnli"
)

# =========================
# FUN√á√ïES AUXILIARES
# =========================

def limpar_texto(texto: str) -> str:
    texto = unicodedata.normalize("NFKD", texto)
    texto = re.sub(r"```.*?```", "", texto, flags=re.DOTALL)
    texto = re.sub(r"http\S+", "", texto)
    texto = re.sub(r"\n\s*\n+", "\n", texto)
    texto = re.sub(r" +", " ", texto)
    return texto.strip()

def gerar_resumo_simples(texto: str, limite: int) -> str:
    """
    Resumo determin√≠stico baseado no pr√≥prio texto.
    Evita uso de modelo de sumariza√ß√£o.
    """
    if len(texto) <= limite:
        return texto
    return texto[:limite].rsplit(".", 1)[0] + "."

def gerar_ranking(resultado):
    return [
        {
            "posicao": i + 1,
            "padrao": label,
            "confianca": score
        }
        for i, (label, score) in enumerate(
            zip(resultado["labels"], resultado["scores"])
        )
    ]

# =========================
# PIPELINE PRINCIPAL
# =========================

inicio = time.perf_counter()

with open(ARQUIVO_ENTRADA, "r", encoding="utf-8") as f:
    texto = limpar_texto(f.read())

if not texto:
    raise ValueError("‚ùå A entrada est√° vazia.")

resumo = gerar_resumo_simples(texto, MAX_RESUMO_CHARS)

# =========================
# CLASSIFICA√á√ÉO
# =========================

print("üè∑Ô∏è Classificando Estrat√©gia de Release...")
class_release = classifier(
    texto,
    candidate_labels=PADROES_RELEASE,
    hypothesis_template="The software project follows this release strategy: {}."
)

print("üîÅ Classificando Fluxo de Trabalho...")
class_fluxo = classifier(
    texto,
    candidate_labels=PADROES_FLUXO,
    hypothesis_template="The development workflow follows: {}."
)

fim = time.perf_counter()

ranking_release = gerar_ranking(class_release)
ranking_fluxo = gerar_ranking(class_fluxo)

# =========================
# SALVAR RESULTADOS
# =========================

with open(ARQUIVO_SAIDA_TXT, "w", encoding="utf-8") as f:
    f.write("=== CLASSIFICA√á√ÉO  ===\n\n")

    f.write("üì• RESUMO DO TEXTO ANALISADO\n")
    f.write(resumo + "\n\n")

    f.write("üì¶ ESTRAT√âGIA DE RELEASE\n")
    f.write(f"Resultado principal: {ranking_release[0]['padrao']}\n")
    f.write(f"Confian√ßa: {ranking_release[0]['confianca']:.2%}\n\n")
    f.write("Ranking completo:\n")
    for r in ranking_release:
        f.write(f" {r['posicao']}¬∫ {r['padrao']} ‚Äî {r['confianca']:.1%}\n")

    f.write("\nüîÅ FLUXO DE TRABALHO\n")
    f.write(f"Resultado principal: {ranking_fluxo[0]['padrao']}\n")
    f.write(f"Confian√ßa: {ranking_fluxo[0]['confianca']:.2%}\n\n")
    f.write("Ranking completo:\n")
    for r in ranking_fluxo:
        f.write(f" {r['posicao']}¬∫ {r['padrao']} ‚Äî {r['confianca']:.1%}\n")

    f.write(f"\n‚è±Ô∏è Tempo total: {fim - inicio:.2f} segundos\n")

print(f"\nüìÑ Resultado salvo em: {os.path.abspath(ARQUIVO_SAIDA_TXT)}")
print(f"‚è±Ô∏è Tempo de execu√ß√£o: {fim - inicio:.2f}s")

üß† Carregando modelo BART-MNLI...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0


üè∑Ô∏è Classificando Estrat√©gia de Release...
üîÅ Classificando Fluxo de Trabalho...

üìÑ Resultado salvo em: /content/Evolucao_Software_2025-2_Anything_llm_atividade2/resultados/bart_mnli+entrada_reduzida.txt
‚è±Ô∏è Tempo de execu√ß√£o: 3.46s


Download local dos resultados

In [3]:
from google.colab import files
files.download("/content/Evolucao_Software_2025-2_Anything_llm_atividade2/resultados/bart_mnli+entrada_reduzida.txt")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>