In [None]:
!pip install lambeq[extras]

Collecting lambeq[extras]
  Downloading lambeq-0.4.3-py3-none-any.whl.metadata (5.4 kB)
Collecting pytket>=1.31.0 (from lambeq[extras])
  Downloading pytket-1.40.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.7 kB)
Collecting tensornetwork (from lambeq[extras])
  Downloading tensornetwork-0.4.6-py3-none-any.whl.metadata (6.8 kB)
Collecting discopy>=1.1.7 (from lambeq[extras])
  Downloading discopy-1.2.0-py3-none-any.whl.metadata (21 kB)
Collecting pennylane>=0.29.1 (from lambeq[extras])
  Downloading PennyLane-0.40.0-py3-none-any.whl.metadata (10 kB)
Collecting pennylane-honeywell (from lambeq[extras])
  Downloading PennyLane_Honeywell-0.34.1-py3-none-any.whl.metadata (8.0 kB)
Collecting pennylane-qiskit (from lambeq[extras])
  Downloading PennyLane_qiskit-0.40.0-py3-none-any.whl.metadata (6.4 kB)
Collecting pytket-qiskit>=0.21.0 (from lambeq[extras])
  Downloading pytket_qiskit-0.63.0-py3-none-any.whl.metadata (4.8 kB)
Collecting rustworkx>=0.14.0 (from penn

In [None]:
import os
import re
import random
import gc
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import pandas as pd
from sklearn.model_selection import train_test_split
from IPython.display import display

from lambeq import BobcatParser, BobcatParseError
from lambeq import StronglyEntanglingAnsatz, AtomicType
from lambeq import PennyLaneModel, Dataset, PytorchTrainer

csv_path = 'bbc-news-data.csv'
if os.path.exists(csv_path):
    df = pd.read_csv(csv_path, sep='\t', engine='python')
    print("Archivo cargado exitosamente.")
    display(df.head())
else:
    raise FileNotFoundError(f"No se encontró {csv_path}")

X = df['title'].values
y = df['category'].values

# Dividir en train/test
X_train_full, X_test, y_train_full, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# Filtrar titulares cortos
def word_count(sentence):
    return len(sentence.split())

filtered = []
for text, cat in zip(X_train_full, y_train_full):
    if word_count(text) <= 8:
        filtered.append(text)
print("Num de titulares cortos:", len(filtered))

if len(filtered) < 50:
    print("Muy pocos ejemplos tras filtrar, los resultados serán muy limitados.")

filtered = filtered[:50]

# 2. Configurar Bobcat y ansatz cuántico

parser = BobcatParser(root_cats=['S'])
ob_map = {
    AtomicType.NOUN: 1,
    AtomicType.CONJUNCTION: 1,
    AtomicType.PREPOSITIONAL_PHRASE: 1,
    AtomicType.SENTENCE: 1,
    AtomicType.PUNCTUATION: 1,
    AtomicType.NOUN_PHRASE: 1,
}
ansatz = StronglyEntanglingAnsatz(
    ob_map=ob_map,
    n_layers=2,
    n_single_qubit_params=3
)


# Discriminador cuántico

class QuantumDiscriminatorWrapper:

    def __init__(self, model_path='reduced_quantum_discriminator.lt'):
        if not os.path.exists(model_path):
            print(f"No se encontró el modelo cuántico en {model_path}. "
                  "Debes entrenarlo y guardarlo antes.")
        else:
            self.model = PennyLaneModel.from_checkpoint(checkpoint_path=model_path)
            self.model.eval()
            print("Discriminador cuántico cargado.")

    def _build_circuit(self, text):
        text_clean = text.strip()
        diag = parser.sentence2diagram(text_clean)
        circuit = ansatz(diag)
        return circuit

    def predict_proba(self, text):

        """
        Devuelve la probabilidad de "ser real" (clase 1).
        Si el diagrama no existe en 'circuit_map', cosa que está pasando con frecuencia,
        capturamos KeyError y devolvemos prob=0.0 para que no se detenga la ejecución.
        """
        if not hasattr(self, 'model'):
            return 0.0

        try:
            circuit = self._build_circuit(text)
        except BobcatParseError:
            # Fallo al parsear => prob cero
            return 0.0

        with torch.no_grad():
            try:
                logits = self.model([circuit])
            except KeyError:
                # Si el diagrama no está en circuit_map
                print(f"(Info) Diagrama no visto: '{text}'. Se asigna prob=0.0")
                return 0.0
            probs = F.softmax(logits, dim=-1)
            prob_real = probs[0, 1].item()
        return prob_real


Q_DISCRIMINATOR_PATH = 'reduced_quantum_discriminator.lt'
quantum_disc = QuantumDiscriminatorWrapper(model_path=Q_DISCRIMINATOR_PATH)


# Generador de texto
def clean_text_simple(text):
    text = re.sub(r'[^\w\s]', '', text)
    text = text.lower().strip()
    return text

all_texts = [clean_text_simple(t) for t in filtered]
word_freq = {}
for tx in all_texts:
    for w in tx.split():
        word_freq[w] = word_freq.get(w, 0) + 1
sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
# Limitar a top-500
sorted_words = sorted_words[:500]
idx2word = ["<PAD>", "<EOS>"] + [w for w,_ in sorted_words]
word2idx = {w:i for i,w in enumerate(idx2word)}
vocab_size = len(idx2word)
MAX_LEN=8

def text_to_ids(text):
    text = clean_text_simple(text)
    tokens = text.split()
    ids = []
    for w in tokens:
        ids.append(word2idx[w] if w in word2idx else 0)
    ids = ids[:MAX_LEN]
    if len(ids)<MAX_LEN:
        ids.append(word2idx["<EOS>"])
    while len(ids)<MAX_LEN:
        ids.append(word2idx["<PAD>"])
    return ids

def ids_to_text(ids):
    words = []
    for ix in ids:
        if ix==0:
            continue
        w = idx2word[ix]
        if w=="<EOS>":
            break
        words.append(w)
    return " ".join(words)

class TextGenerator(nn.Module):
    def __init__(self, vocab_size, embed_dim=64, hidden_dim=64):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, embed_dim)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        self.lin = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x):
        emb = self.embed(x)
        out, hidden = self.lstm(emb)
        logits = self.lin(out)
        return logits

    def sample(self, batch_size=1, seq_len=MAX_LEN):
        generated = []
        hidden=None
        inp = torch.zeros(batch_size,1,dtype=torch.long)
        for step in range(seq_len):
            emb = self.embed(inp)
            out, hidden = self.lstm(emb, hidden)
            logits = self.lin(out[:,-1,:])
            probs = F.softmax(logits, dim=-1)
            next_token = torch.multinomial(probs, 1)
            if batch_size==1:
                generated.append(next_token.item())
            else:
                pass
            inp=next_token
        return generated

gen = TextGenerator(vocab_size, embed_dim=64, hidden_dim=64)
gen_optim = optim.Adam(gen.parameters(), lr=1e-3)


# Bucle adversarial
def generator_step(generator, disc_q, steps=1):

    generator.train()
    for _ in range(steps):
        gen_optim.zero_grad()

        # Generar secuencia de tokens
        tokens = generator.sample(batch_size=1, seq_len=MAX_LEN)
        gen_text = ids_to_text(tokens)

        # 2) reward
        reward = disc_q.predict_proba(gen_text)

        inp = torch.zeros(1,MAX_LEN,dtype=torch.long)
        for i,tk in enumerate(tokens[:MAX_LEN]):
            inp[0,i] = tk
        logits = generator(inp)

        total_logprob = 0
        for i,tk in enumerate(tokens[:MAX_LEN]):
            logp_i = F.log_softmax(logits[0,i,:], dim=-1)
            total_logprob += logp_i[tk]

        loss = - reward * total_logprob

        loss.backward()
        gen_optim.step()

        print(f"   gen_text: '{gen_text}'  reward={reward:.4f}  loss={loss.item():.4f}")

N_EPOCHS_ADV = 10
print("\n=== Entrenamiento Adversarial (Quantum Discriminator) ===")
for epoch in range(N_EPOCHS_ADV):
    print(f"\nEpoch {epoch+1}/{N_EPOCHS_ADV}")
    generator_step(gen, quantum_disc, steps=1)

print("\n=== Fin del entrenamiento adversarial DEMO ===\n")

gen.eval()
for _ in range(5):
    tokens = gen.sample()
    txt = ids_to_text(tokens)
    prob_real = quantum_disc.predict_proba(txt)
    print(f"Generated: '{txt}'  -> quantum_disc prob_real={prob_real:.4f}")


Archivo cargado exitosamente.


Unnamed: 0,category,filename,title,content
0,business,001.txt,Ad sales boost Time Warner profit,Quarterly profits at US media giant TimeWarne...
1,business,002.txt,Dollar gains on Greenspan speech,The dollar has hit its highest level against ...
2,business,003.txt,Yukos unit buyer faces loan claim,The owners of embattled Russian oil giant Yuk...
3,business,004.txt,High fuel prices hit BA's profits,British Airways has blamed high fuel prices f...
4,business,005.txt,Pernod takeover talk lifts Domecq,Shares in UK drinks and food firm Allied Dome...


Num de titulares cortos: 1779
Discriminador cuántico cargado.

=== Entrenamiento Adversarial (Quantum Discriminator) ===

Epoch 1/10
   gen_text: 'open for tottenham defy spiderman plots'  reward=0.0000  loss=0.0000

Epoch 2/10
(Info) Diagrama no visto: 'betting school wins philippoussis rusedski makes returns boss'. Se asigna prob=0.0
   gen_text: 'betting school wins philippoussis rusedski makes returns boss'  reward=0.0000  loss=0.0000

Epoch 3/10
(Info) Diagrama no visto: 'gloom worlds spotlight edwards benitez sundance running call'. Se asigna prob=0.0
   gen_text: 'gloom worlds spotlight edwards benitez sundance running call'  reward=0.0000  loss=0.0000

Epoch 4/10

Epoch 5/10
(Info) Diagrama no visto: 'pipeline 1m more new wants bnp tips bmw'. Se asigna prob=0.0
   gen_text: 'pipeline 1m more new wants bnp tips bmw'  reward=0.0000  loss=0.0000

Epoch 6/10
(Info) Diagrama no visto: 'microsoft stop 1638m state faces media us police'. Se asigna prob=0.0
   gen_text: 'microsoft stop

**Parece que el discriminador no es capaz de evaluar oraciones que no haya visto durante su entrenamiento realizado en el archivo reduced_discriminador. No se debe a un mal rendimiento del discriminador, ya que devuelve directamente una excepción sin hacer la predicción**

Esto hace que sea inviable utilizar Lambeq para trabajar dentro de una QGAN