In [1]:
import nltk
import evaluate
import torch

import numpy as np
import torch.nn as nn
import torch.nn.functional as F

from datasets import load_dataset
from torch.utils.data import Dataset, DataLoader

from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM
from transformers import T5Tokenizer, T5ForConditionalGeneration, Seq2SeqTrainingArguments, Seq2SeqTrainer
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

if device.type == 'cuda':
    device_name = torch.cuda.get_device_name(device)

    print(f'CUDA on {device_name}')
else:
    print(f'CPU')

CUDA on NVIDIA RTX 500 Ada Generation Laptop GPU


In [3]:
SECRET_LENGTH = 16
MAX_STEGO_LENGTH=128

In [4]:
import gc

# Delete models, clean up memory

if 'tokenizer' in globals():
    del tokenizer

if 'generator' in globals():
    del generator
    
if 'decoder' in globals():
    del decoder
    
if 'semantic_anchor' in globals():
    del semantic_anchor

gc.collect()

torch._C._cuda_clearCublasWorkspaces()
torch.cuda.empty_cache()


In [5]:
# Check if everything's cleaned
allocated = torch.cuda.memory_allocated() / (1024 * 1024)
reserved = torch.cuda.memory_reserved() / (1024 * 1024)

print(f'Allocated: {allocated:.1f} MB; Reserved: {reserved:.1f} MB')

Allocated: 0.0 MB; Reserved: 0.0 MB


In [6]:
GENERATOR_MODEL = "Qwen/Qwen3-0.6B"
DECODER_MODEL = "google/flan-t5-base"
SEMANTIC_ANCHOR = "sentence-transformers/all-MiniLM-L6-v2"
#GENERATOR_MODEL = "mistralai/Mistral-7B-v0.1"
#GENERATOR_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
#GENERATOR_MODEL = "HuggingFaceTB/SmolLM2-360M-Instruct"

tokenizer = AutoTokenizer.from_pretrained(GENERATOR_MODEL)
# tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME)

generator = AutoModelForCausalLM.from_pretrained(GENERATOR_MODEL, dtype="auto").to(device)
decoder = T5ForConditionalGeneration.from_pretrained(DECODER_MODEL, dtype="auto").to(device)

semantic_anchor = SentenceTransformer(SEMANTIC_ANCHOR).to(device)

# Freeze base model
for p in semantic_anchor.parameters():
    p.requires_grad = False

generator.train()
decoder.train()
semantic_anchor.eval()

SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False, 'architecture': 'BertModel'})
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)

In [None]:
# Reprojection layer to map generator to decoder embeddings
proj_layer = torch.nn.Linear(generator.config.hidden_size, decoder.config.hidden_size, device=device, dtype=torch.bfloat16)

In [11]:
def build_prompt(carrier, secret):
    return f"{carrier}\n[hidden:{secret}]\n"

In [12]:
def generator_forward(prompt, temperature=1.0):
    inputs = tokenizer(prompt, return_tensors="pt").to(device)

    out = generator(
        input_ids=inputs.input_ids,
        attention_mask=inputs.attention_mask,
        return_dict=True
    )

    logits = out.logits[:, -MAX_STEGO_LENGTH:, :]   # [B, T, V]
    probs  = torch.softmax(logits / temperature, dim=-1)

    return logits, probs


In [13]:
@torch.no_grad()
def anchor_forward(anchor, prompt: str) -> torch.Tensor:
    """
    Returns logits from the frozen base generator. No gradients flow.
    """
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = anchor(
        input_ids=inputs.input_ids,
        attention_mask=inputs.attention_mask,
        return_dict=True
    )
    logits = outputs.logits[:, -MAX_STEGO_LENGTH:, :]
    return logits


In [14]:
def soft_embeddings(token_probs):
    """
    token_probs: [B, T, V]
    """
    embed_matrix = generator.get_input_embeddings().weight  # [V, d]
    return token_probs @ embed_matrix

In [15]:
@torch.no_grad()
def soft_to_text(token_probs):
    ids = token_probs.argmax(dim=-1)
    return tokenizer.decode(ids[0], skip_special_tokens=True)


In [16]:
def decode_secret(soft_embeds, secret_bits):
    """
    secret_bits: string like "0101101001010110"
    """
    target = " ".join(secret_bits)
    labels = tokenizer(target, return_tensors="pt").input_ids.to(device)

    out = decoder(
        inputs_embeds=soft_embeds,
        labels=labels,
        return_dict=True
    )

    return out.loss

In [17]:
def kl_semantic_loss(stego_logits, base_logits):
    p = F.log_softmax(stego_logits, dim=-1)
    q = F.softmax(base_logits, dim=-1)
    return F.kl_div(p, q, reduction="batchmean")

In [18]:
def encode(carrier, secret):
    prompt = build_prompt(carrier, secret)
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    ids = generator.generate(
        **inputs,
        max_length=MAX_STEGO_LENGTH,
        do_sample=True,
        temperature=0.8,
        top_k=50
    )
    return tokenizer.decode(ids[0], skip_special_tokens=True)


In [19]:
def decode(stego_text):
    inputs = tokenizer(stego_text, return_tensors="pt").to(device)
    ids = decoder.generate(
        **inputs,
        max_length=SECRET_LENGTH * 2
    )
    bits = tokenizer.decode(ids[0], skip_special_tokens=True)
    return bits.replace(" ", "")[:SECRET_LENGTH]


In [20]:
optimizer = torch.optim.AdamW(
    list(generator.parameters()) + list(decoder.parameters()) + list(proj_layer.parameters()),
    lr=3e-5
)

for step in range(1000):
    carrier_prompt = "Explain how neural networks learn."
    secret = "".join(str(torch.randint(0, 2, ()).item()) for _ in range(SECRET_LENGTH))

    prompt = build_prompt(carrier_prompt, secret)

    stego_logits, token_probs = generator_forward(prompt)
    soft_embeds = soft_embeddings(token_probs)

    # Linear projection to decoder embedding space
    decoder_inputs = proj_layer(soft_embeds)     # [B, T, d_dec]

    # Calculate recovery loss based on decoding success
    recovery_loss = decode_secret(decoder_inputs, secret)

    # Calculate semantic loss compared to base model
    base_logits = anchor_forward(semantic_anchor, carrier_prompt)
    semantic_loss = kl_semantic_loss(stego_logits, base_logits)

    # Combine loss
    loss = 0.7 * recovery_loss + 0.3 * semantic_loss

    loss.backward()

    optimizer.step()
    optimizer.zero_grad()

    if step % 1 == 0:
        stego_text = encode(carrier_prompt, secret)
        decoded_secret = decode(stego_text)

        print(f"Step {step} | L_sem: {semantic_loss.item():.3f} | L_rec: {recovery_loss.item():.3f}")
        print(f'Secret: {secret} | Decoded: {decoded_secret}')
        print(stego_text)


RuntimeError: mat1 and mat2 must have the same dtype, but got BFloat16 and Float

In [None]:
carrier = "Write a short paragraph explaining how neural networks learn."
secret = "1011001110001111"

stego_text = encode(carrier, secret)
recovered = decode(stego_text)

print("Stego text:")
print(stego_text)
print()
print("Original secret:", secret)
print("Recovered secret:", recovered)


Stego text:
[caption id:U7M2Afida] a snabbed photopollint eminates his nose at the top with a pep.

Original secret: 1011001110001111
Recovered secret: asnabbedphotopol
