In [1]:
import sys
sys.path.append("../") 

In [6]:
import torch
from transformers import AutoTokenizer
from models.transformer_lm import MiniTransformerLM
import random
import os

In [8]:
# Parámetros
device = "cuda" if torch.cuda.is_available() else "cpu"
vocab_model = "gpt2"
model_path = "../checkpoints/minitransformer.pt"
max_tokens = 128
num_samples = 5

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(vocab_model)
tokenizer.pad_token = tokenizer.eos_token

# Modelo
model = MiniTransformerLM(
    vocab_size=tokenizer.vocab_size,
    d_model=256,
    n_heads=4,
    n_layers=4,
    max_len=max_tokens
).to(device)
model.load_state_dict(torch.load(model_path, map_location=device))
model.eval()


MiniTransformerLM(
  (token_emb): Embedding(50257, 256)
  (pos_emb): Embedding(128, 256)
  (transformer): TransformerEncoder(
    (layers): ModuleList(
      (0-3): 4 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
        )
        (linear1): Linear(in_features=256, out_features=1024, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=1024, out_features=256, bias=True)
        (norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (ln_f): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
  (head): Linear(in_features=256, out_features=50257, bias=True)
)

In [None]:
# Función de generación autoregresiva
def generar_texto(prompt="", max_len=100):
    if not prompt.strip():
        prompt = tokenizer.eos_token 
    
    ids = tokenizer.encode(prompt, return_tensors="pt").to(device).long()
    for _ in range(max_len):
        attn_mask = torch.ones_like(ids)
        with torch.no_grad():
            logits = model(ids, attention_mask=attn_mask)
        next_token = torch.argmax(logits[:, -1, :], dim=-1).unsqueeze(0)
        ids = torch.cat([ids, next_token], dim=1)
    return tokenizer.decode(ids[0], skip_special_tokens=True)

# Métricas de diversidad
def distinct_n(seqs, n):
    total_ngrams = 0
    unique_ngrams = set()
    for text in seqs:
        tokens = text.split()
        ngrams = zip(*[tokens[i:] for i in range(n)])
        ngrams = list(ngrams)
        total_ngrams += len(ngrams)
        unique_ngrams.update(ngrams)
    return len(unique_ngrams) / total_ngrams if total_ngrams > 0 else 0



In [14]:
# Generar muestras
resultados = []
for _ in range(num_samples):
    texto = generar_texto("", max_len=max_tokens)
    print("-" * 30)
    print(texto)
    resultados.append(texto)

# Calcular diversidad
d1 = distinct_n(resultados, 1)
d2 = distinct_n(resultados, 2)
print("\nMétricas de diversidad:")
print(f"Distinct-1: {d1:.4f}")
print(f"Distinct-2: {d2:.4f}")

  output = torch._nested_tensor_from_mask(


------------------------------
................................................................................................................................
------------------------------
................................................................................................................................
------------------------------
................................................................................................................................
------------------------------
................................................................................................................................
------------------------------
................................................................................................................................

Métricas de diversidad:
Distinct-1: 0.2000
Distinct-2: 0.0000
