<a href="https://colab.research.google.com/github/Mavitu56/SLMs/blob/main/SLMs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Workflow Completo: Aprimoramento e Comparação de SLMs

Este notebook é a versão final, completa e funcional do plano para aprimorar e comparar um Small Language Model (SLM) usando Destilação de Conhecimento e Engenharia de Prompt.

In [None]:
# prompt: login hugging face

from huggingface_hub import notebook_login
notebook_login()

In [1]:
# prompt: importar repositorio github

!git clone https://github.com/Mavitu56/SLMs.git

Cloning into 'SLMs'...
remote: Enumerating objects: 56, done.[K
remote: Counting objects: 100% (56/56), done.[K
remote: Compressing objects: 100% (50/50), done.[K
remote: Total 56 (delta 10), reused 3 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (56/56), 24.71 MiB | 10.05 MiB/s, done.
Resolving deltas: 100% (10/10), done.
Updating files: 100% (29/29), done.


### 1. Instalação e Importações Globais

In [2]:
!pip install torch transformers "datasets==2.19.0" evaluate peft accelerate ipywidgets bitsandbytes sentencepiece pandas matplotlib --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.0/542.0 kB[0m [31m22.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m57.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m22.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m56.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m812.4 kB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m15.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
import os
import torch
import re
import evaluate
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from collections import Counter
from transformers import (
    AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, AdamW,
    TrainingArguments, Trainer, DataCollatorForLanguageModeling
)
from peft import PeftModel, LoraConfig, get_peft_model
from datasets import load_dataset, Dataset
import torch.nn.functional as F

### 2. Funções da Fase 0: Configuração do Ambiente

In [4]:
# 📦 CONFIGURAÇÃO DE MÉTRICAS BLEU/ROUGE
bleu_metric = evaluate.load("bleu")
rouge_metric = evaluate.load("rouge")

# 📦 CONFIGURAÇÕES GERAIS
class KDConfig:
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    TEMPERATURE = 2.0
    LEARNING_RATE = 5e-5
    NUM_EPOCHS = 3

config = KDConfig()

### 3. Funções da Fase 1: Destilação de Conhecimento (KD)

In [5]:
# 📦 FUNÇÕES UTILITÁRIAS
def compute_text_metrics(predictions, references):
    preds = [p['prediction_text'] for p in predictions]
    refs = [r['answers']['text'][0] for r in references]

    bleu_result = bleu_metric.compute(predictions=preds, references=[[ref] for ref in refs])
    rouge_result = rouge_metric.compute(predictions=preds, references=refs)

    return {
        "bleu": bleu_result["bleu"] * 100,
        "rougeL": rouge_result["rougeL"] * 100
    }

def load_model_quantized(model_id):
    print(f"\n--- Loading Quantized Model: {model_id} ---")
    quant_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    model = AutoModelForCausalLM.from_pretrained(
        model_id, device_map="auto", quantization_config=quant_config, torch_dtype=torch.bfloat16
    )
    return model, tokenizer

def pad_to_len(logits, target_len):
    pad_len = target_len - logits.shape[1]
    if pad_len > 0:
        pad = torch.zeros((logits.shape[0], pad_len, logits.shape[2]), device=logits.device)
        logits = torch.cat([logits, pad], dim=1)
    return logits

def compute_distillation_loss(student_logits, teacher_logits, temperature):
    soft_teacher_probs = F.softmax(teacher_logits / temperature, dim=-1)
    soft_student_log_probs = F.log_softmax(student_logits / temperature, dim=-1)
    return (temperature**2) * F.kl_div(soft_student_log_probs, soft_teacher_probs, reduction='batchmean')

###4. Datasets

In [None]:
# 📦 DATASETS
validation_dataset = load_dataset("squad", split="validation")
train_dataset = load_dataset("squad", split="train")
kd_transfer_set = train_dataset.shuffle(seed=42).select(range(10000))
icl_cot_set = train_dataset.select(range(5))
prepared_data = {
    "evaluation": validation_dataset,
    "kd_transfer": kd_transfer_set,
    "icl_cot_examples": icl_cot_set
}

### 5. Funções da Fase 2: Treinamento

In [6]:
# 📦 TREINAMENTO KD
def run_base_kd_training(student_model, teacher_model, student_tokenizer, teacher_tokenizer, kd_dataset, config, output_dir):
    print(f"\n--- Running Base Knowledge Distillation: {output_dir} ---")
    lora_config = LoraConfig(r=16, lora_alpha=32, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none", task_type="CAUSAL_LM")
    peft_student_model = get_peft_model(student_model, lora_config)
    optimizer = AdamW(peft_student_model.parameters(), lr=config.LEARNING_RATE)
    teacher_model.eval()

    for epoch in range(config.NUM_EPOCHS):
        print(f"Epoch {epoch+1}/{config.NUM_EPOCHS}")
        peft_student_model.train()
        for batch in tqdm(kd_dataset.shuffle(seed=epoch).select(range(200))):
            prompt = f"Contexto: {batch['context']}\n\nPergunta: {batch['question']}"
            teacher_inputs = teacher_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(config.DEVICE)
            student_inputs = student_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(config.DEVICE)
            with torch.no_grad():
                teacher_logits = teacher_model(**teacher_inputs).logits
            student_logits = peft_student_model(**student_inputs).logits

            if student_logits.shape[-1] != teacher_logits.shape[-1]:
                raise ValueError(f"Vocab mismatch: student {student_logits.shape[-1]}, teacher {teacher_logits.shape[-1]}")

            max_len = max(student_logits.shape[1], teacher_logits.shape[1])
            student_logits = pad_to_len(student_logits, max_len)
            teacher_logits = pad_to_len(teacher_logits, max_len)

            loss = compute_distillation_loss(student_logits, teacher_logits, config.TEMPERATURE)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

    os.makedirs(output_dir, exist_ok=True)
    peft_student_model.save_pretrained(output_dir)
    student_tokenizer.save_pretrained(output_dir)
    print(f"Modelo salvo em {output_dir}")

###6. Avaliação e Prompting

In [None]:
# 📦 PROMPTING E AVALIAÇÃO
def run_generic_evaluation(model, tokenizer, eval_dataset, prompt_template, strategy_name, eval_subset_size=200):
    print(f"\n--- Avaliando: {strategy_name} ---")
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.eval()
    predictions, references = [], []

    for example in tqdm(eval_dataset.select(range(eval_subset_size))):
        prompt = prompt_template.format(context=example['context'], question=example['question'])
        inputs = tokenizer(prompt, return_tensors="pt", max_length=1536, truncation=True).to(device)
        with torch.no_grad():
            outputs = model.generate(**inputs, max_new_tokens=60, pad_token_id=tokenizer.eos_token_id)

        full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        predicted_answer = full_text[len(prompt):].strip()
        predictions.append({'id': example['id'], 'prediction_text': predicted_answer})
        references.append({'id': example['id'], 'answers': example['answers']})

    return predictions, references

In [None]:
# 📦 MODELOS E KD
teacher_model, teacher_tokenizer = load_model_quantized("mistralai/Mistral-7B-Instruct-v0.2")
student_model, student_tokenizer = load_model_quantized("TinyLlama/TinyLlama-1.1B-Chat-v1.0")

run_base_kd_training(student_model, teacher_model, student_tokenizer, teacher_tokenizer, prepared_data['kd_transfer'], config, "./github_results/kd_model")


In [None]:
# 📦 TREINAMENTO AUTO-DESTILAÇÃO
print("\n--- Treinando Auto-Destilação ---")
kd_model_teacher = PeftModel.from_pretrained(base_model, "./github_results/kd_model").merge_and_unload()
run_base_kd_training(student_model, kd_model_teacher, student_tokenizer, tokenizer, prepared_data['kd_transfer'], config, "./github_results/kd_self_distilled_model")

# 📦 CARREGAR MODELOS AVALIADOS
kd_model = PeftModel.from_pretrained(base_model, "./github_results/kd_model").merge_and_unload()
kd_self_model = PeftModel.from_pretrained(base_model, "./github_results/kd_self_distilled_model").merge_and_unload()

In [None]:
# 📦 AVALIAÇÃO FINAL
strategies = {
    "Zero-Shot": {"prompt": "Contexto: {context}\n\nPergunta: {question}\n\nResposta:", "paths": 1},
    "Zero-Shot-CoT": {"prompt": "Contexto: {context}\n\nPergunta: {question}\n\nPense passo a passo. A resposta final é:", "paths": 1},
    "ICL (k=3)": {"prompt": "".join([f"Contexto: {ex['context']}\nPergunta: {ex['question']}\nResposta: {ex['answers']['text'][0]}\n---\n\n" for ex in prepared_data['icl_cot_examples'].select(range(3))]) + "Contexto: {context}\n\nPergunta: {question}\n\nResposta:", "paths": 1},
    "Auto-Consistência (n=5)": {"prompt": "Contexto: {context}\n\nPergunta: {question}\n\nPense passo a passo. A resposta final é:", "paths": 5}
}

models_to_eval = {"KD Base": kd_model, "KD Auto-Destilado": kd_self_model}

results = {}
for model_name, model_instance in models_to_eval.items():
    for strat_name, strat_conf in strategies.items():
        preds, refs = run_generic_evaluation(model_instance, tokenizer, prepared_data['evaluation'], strat_conf['prompt'], f"{model_name} - {strat_name}", num_paths=strat_conf['paths'])
        metrics = compute_text_metrics(preds, refs)
        results[(model_name, strat_name)] = metrics

In [None]:
print("\nResultados:")
for strat, met in results.items():
    print(f"{strat}: BLEU {met['bleu']:.1f} | ROUGE-L {met['rougeL']:.1f}")

results_df = pd.DataFrame(results).T
plot_path = os.path.join("./github_results", "bleu_rouge_comparison.png")
results_df.plot(kind='bar', figsize=(10,6), title='Comparação BLEU / ROUGE-L entre Estratégias')
plt.ylabel("Score")
plt.tight_layout()
plt.savefig(plot_path)
print(f"Gráfico salvo em {plot_path}")