<a href="https://colab.research.google.com/github/LuisMend12/Blazor-Pizza-Workshop/blob/main/Untitled3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
!pip install -q transformers sentence-transformers datasets tqdm scipy

!pip install datasets
from datasets import load_dataset

dataset = load_dataset("stsb_multi_mt", "en")
print(dataset)
print(dataset["test"][0])






Downloading readme: 0.00B [00:00, ?B/s]

ValueError: Invalid pattern: '**' can only be an entire path component

In [6]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer, util
from datasets import load_dataset
from tqdm import tqdm
from scipy.stats import pearsonr, spearmanr

class SBERTEnhancedSTSGenerator:
    def __init__(self, model_name="gpt2", sts_length=5, top_k=50, device=None,
                 sbert_model_name="all-MiniLM-L6-v2", alpha=0.7):
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForCausalLM.from_pretrained(model_name)
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)
        self.sts_length = sts_length
        self.top_k = top_k
        self.sts = [""] * sts_length
        self.sbert = SentenceTransformer(sbert_model_name)
        self.alpha = alpha

    def score_sts(self, context_text, candidate_sts):
        full_input = context_text + " " + candidate_sts
        inputs = self.tokenizer(full_input, return_tensors="pt", truncation=True, max_length=512)
        inputs = {k: v.to(self.device) for k, v in inputs.items()}
        with torch.no_grad():
            outputs = self.model(**inputs)
        log_probs = torch.log_softmax(outputs.logits[:, :-1, :], dim=-1)
        input_ids = inputs["input_ids"]
        token_log_probs = log_probs.gather(2, input_ids[:, 1:].unsqueeze(-1)).squeeze(-1)
        context_len = len(self.tokenizer(context_text)["input_ids"])
        sts_log_probs = token_log_probs[:, context_len - 1:]
        gpt_score = sts_log_probs.mean().item()

        sbert_score = util.cos_sim(
            self.sbert.encode(context_text, convert_to_tensor=True),
            self.sbert.encode(candidate_sts, convert_to_tensor=True)
        ).item()

        return self.alpha * gpt_score + (1 - self.alpha) * sbert_score

    def optimize(self, context_text, iterations=3):
        self.sts = [""] * self.sts_length
        for _ in range(iterations):
            for i in range(self.sts_length):
                best_tok, best_score = None, -1e9
                vocab = list(self.tokenizer.get_vocab().keys())[:3000]
                for tok in vocab:
                    candidate = self.sts.copy()
                    candidate[i] = tok
                    cand_str = self.tokenizer.convert_tokens_to_string(candidate)
                    score = self.score_sts(context_text, cand_str)
                    if score > best_score:
                        best_score = score
                        best_tok = tok
                self.sts[i] = best_tok
        return self.tokenizer.convert_tokens_to_string(self.sts)

def evaluate_hf_stsb(limit=10):
    dataset = load_dataset("mteb/stsbenchmark-sts")
    df = dataset["test"].select(range(limit)).to_pandas()

    gen = SBERTEnhancedSTSGenerator(sts_length=5)
    human_scores = []
    model_scores = []

    print("Evaluating on Hugging Face STS-B...")
    for _, row in tqdm(df.iterrows(), total=len(df)):
        s1, s2 = row['sentence1'], row['sentence2']
        label = float(row['score']) / 5.0  # normalize to [0, 1]

        context = f"System: Compare these two sentences.\nSentence 1: {s1}\nSentence 2: {s2}\nSimilarity summary:"
        generated_sts = gen.optimize(context, iterations=2)

        emb1 = gen.sbert.encode(generated_sts, convert_to_tensor=True)
        emb2 = gen.sbert.encode(s2, convert_to_tensor=True)
        similarity = util.cos_sim(emb1, emb2).item()

        human_scores.append(label)
        model_scores.append(similarity)

        print(f"\n[S1]: {s1}\n[S2]: {s2}\n[GEN]: {generated_sts}\n[GT Score]: {label:.2f} | [Model Score]: {similarity:.2f}")

    pearson = pearsonr(model_scores, human_scores)[0]
    spearman = spearmanr(model_scores, human_scores)[0]

    print(f"\n--- Final Evaluation ---")
    print(f"Pearson correlation: {pearson:.4f}")
    print(f"Spearman correlation: {spearman:.4f}")

# Run the evaluation
evaluate_hf_stsb(limit=10)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme: 0.00B [00:00, ?B/s]

ValueError: Invalid pattern: '**' can only be an entire path component