In [1]:
import nltk
import evaluate
import torch

import numpy as np
import torch.nn as nn
import torch.nn.functional as F

from datasets import load_dataset
from torch.utils.data import Dataset, DataLoader

from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM
from transformers import T5Tokenizer, T5ForConditionalGeneration, Seq2SeqTrainingArguments, Seq2SeqTrainer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

if device.type == 'cuda':
    device_name = torch.cuda.get_device_name(device)

    print(f'CUDA on {device_name}')
else:
    print(f'CPU')

CUDA on NVIDIA GeForce RTX 3060 Laptop GPU


In [3]:
SECRET_LENGTH = 16
MAX_STEGO_LENGTH = 1024

In [4]:
import gc

# Delete models, clean up memory

if 'generator_tokenizer' in globals():
    del generator_tokenizer
    
if 'decoder_tokenizer' in globals():
    del decoder_tokenizer

if 'generator' in globals():
    del generator
    
if 'decoder' in globals():
    del decoder
    
if 'semantic_anchor' in globals():
    del semantic_anchor

gc.collect()

torch._C._cuda_clearCublasWorkspaces()
torch.cuda.empty_cache()


In [5]:
# Check if everything's cleaned
allocated = torch.cuda.memory_allocated() / (1024 * 1024)
reserved = torch.cuda.memory_reserved() / (1024 * 1024)

print(f'Allocated: {allocated:.1f} MB; Reserved: {reserved:.1f} MB')

Allocated: 0.0 MB; Reserved: 0.0 MB


In [6]:
#GENERATOR_MODEL = "Qwen/Qwen3-0.6B"
#GENERATOR_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
#GENERATOR_MODEL = "HuggingFaceTB/SmolLM2-360M-Instruct"
GENERATOR_MODEL = "HuggingFaceTB/SmolLM2-135M-Instruct"
DECODER_MODEL = "google/flan-t5-base"

generator_tokenizer = AutoTokenizer.from_pretrained(GENERATOR_MODEL)
decoder_tokenizer = T5Tokenizer.from_pretrained(DECODER_MODEL)

# Using BF16 seems to fix some inf/nan errors in one of the models
dtype = torch.bfloat16

generator = AutoModelForCausalLM.from_pretrained(GENERATOR_MODEL, dtype=dtype).to(device)
decoder = T5ForConditionalGeneration.from_pretrained(DECODER_MODEL, dtype=dtype).to(device)

semantic_anchor = AutoModelForCausalLM.from_pretrained(GENERATOR_MODEL, dtype=dtype).to(device)

# Freeze base model
for p in semantic_anchor.parameters():
    p.requires_grad = False

generator.train()
decoder.train()
semantic_anchor.eval()

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(49152, 576, padding_idx=2)
    (layers): ModuleList(
      (0-29): 30 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=576, out_features=576, bias=False)
          (k_proj): Linear(in_features=576, out_features=192, bias=False)
          (v_proj): Linear(in_features=576, out_features=192, bias=False)
          (o_proj): Linear(in_features=576, out_features=576, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=576, out_features=1536, bias=False)
          (up_proj): Linear(in_features=576, out_features=1536, bias=False)
          (down_proj): Linear(in_features=1536, out_features=576, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm((576,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((576,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((576,), eps=1e-05)
    (r

In [7]:
# Reprojection layer to map generator to decoder embeddings
proj_layer = torch.nn.Linear(generator.config.hidden_size, decoder.config.hidden_size, device=device, dtype=dtype)

In [8]:
def build_generator_prompt(carrier, secret):
    messages = [{"role": "user", "content": f"You are an expert steganographer. Follow the instructions as usual, but hide this data in your response: {secret}. {carrier}"}]
    return generator_tokenizer.apply_chat_template(messages, tokenize=False)

In [9]:
def generator_forward(inputs, temperature=1.0):
    out = generator(
        input_ids=inputs.input_ids,
        attention_mask=inputs.attention_mask,
        return_dict=True
    )

    logits = out.logits[:, -MAX_STEGO_LENGTH:, :]   # [B, T, V]
    probs  = torch.softmax(logits / temperature, dim=-1)

    return logits, probs


In [10]:
@torch.no_grad()
def anchor_forward(anchor, inputs) -> torch.Tensor:
    outputs = anchor(
        input_ids=inputs.input_ids,
        attention_mask=inputs.attention_mask,
        return_dict=True
    )
    logits = outputs.logits[:, -MAX_STEGO_LENGTH:, :]
    return logits


In [11]:
def soft_embeddings(token_probs):
    """
    token_probs: [B, T, V]
    """
    embed_matrix = generator.get_input_embeddings().weight  # [V, d]
    return token_probs @ embed_matrix

In [12]:
def decode_secret(soft_embeds, secret_bits):
    """
    secret_bits: string like "0101101001010110"
    """
    target = " ".join(secret_bits)
    labels = decoder_tokenizer(target, return_tensors="pt").input_ids.to(device)

    out = decoder(
        inputs_embeds=soft_embeds,
        labels=labels,
        return_dict=True
    )

    return out.loss

In [13]:
def kl_semantic_loss(stego_logits, base_logits):
    p = F.log_softmax(stego_logits, dim=-1)
    q = F.softmax(base_logits, dim=-1)

    # Masked?
    # kl = F.kl_div(log_p, q, reduction="none")  # [B, T, V]
    # kl = kl.sum(dim=-1)                        # [B, T]

    # # Mask padding tokens
    # kl = kl * attention_mask

    # return kl.sum() / attention_mask.sum()

    return F.kl_div(p, q, reduction="batchmean")

In [17]:
def encode(carrier, secret):
    prompt = build_generator_prompt(carrier, secret)
    inputs = generator_tokenizer(prompt, return_tensors="pt").to(device)
    ids = generator.generate(
        **inputs,
        max_length=MAX_STEGO_LENGTH,
        do_sample=True,
        temperature=0.8,
        top_k=50
    )
    return generator_tokenizer.decode(ids[0], skip_special_tokens=True)


In [15]:
def decode(stego_text):
    inputs = decoder_tokenizer(stego_text, return_tensors="pt").to(device)
    ids = decoder.generate(
        **inputs,
        max_length=SECRET_LENGTH * 2
    )
    bits = decoder_tokenizer.decode(ids[0], skip_special_tokens=True)
    return bits.replace(" ", "")[:SECRET_LENGTH]


In [18]:
optimizer = torch.optim.AdamW(
    list(generator.parameters()) + list(decoder.parameters()) + list(proj_layer.parameters()),
    lr=3e-5
)

for step in range(1000):
    carrier_prompt = "Explain how neural networks learn."
    secret = "".join(str(torch.randint(0, 2, ()).item()) for _ in range(SECRET_LENGTH))

    prompt = build_generator_prompt(carrier_prompt, secret)
    generator_inputs = generator_tokenizer(prompt, return_tensors="pt").to(device)

    stego_logits, token_probs = generator_forward(generator_inputs)
    soft_embeds = soft_embeddings(token_probs)

    # Linear projection to decoder embedding space
    decoder_inputs = proj_layer(soft_embeds)     # [B, T, d_dec]

    # Calculate recovery loss based on decoding success
    recovery_loss = decode_secret(decoder_inputs, secret)

    # Calculate semantic loss compared to base model
    base_logits = anchor_forward(semantic_anchor, generator_inputs)
    semantic_loss = kl_semantic_loss(stego_logits, base_logits)

    # Combine loss
    loss = 1.0 * recovery_loss + 0.0 * semantic_loss

    loss.backward()

    optimizer.step()
    optimizer.zero_grad()

    if step % 1 == 0:
        stego_text = encode(carrier_prompt, secret)
        decoded_secret = decode(stego_text)

        print(f"Step {step} | L_sem: {semantic_loss.item():.3f} | L_rec: {recovery_loss.item():.3f}")
        print(f'Secret: {secret} | Decoded: {decoded_secret}')
        print(stego_text)
        print()


Step 0 | L_sem: 0.239 | L_rec: 6.219
Secret: 1000101111010110 | Decoded: Howdoneuralnetwo
system
You are a helpful AI assistant named SmolLM, trained by Hugging Face
user
You are an expert steganographer. Follow the instructions as usual, but hide this data in your response: 1000101111010110. Explain how neural networks learn.
assistant
Neural networks rely heavily on supervised learning techniques to learn complex patterns from data. They utilize a technique called loss function gradient descent to optimize their performance. Here's how this works:

1. Data Selection and Preprocessing: The input data, which is the 1000101111010110 data point, is carefully selected to match the target data. This step is crucial as it ensures that the algorithm makes accurate predictions.

2. Training and Optimization: The first stage involves iteratively updating the data to achieve a better performance. This is typically done using a combination of supervised learning algorithms, such as SGD and stoch

Token indices sequence length is longer than the specified maximum sequence length for this model (746 > 512). Running this sequence through the model will result in indexing errors


Step 1 | L_sem: 0.299 | L_rec: 1.469
Secret: 0010100101111010 | Decoded: Whatareneuralnet
system
You are a helpful AI assistant named SmolLM, trained by Hugging Face
user
You are an expert steganographer. Follow the instructions as usual, but hide this data in your response: 0010100101111010. Explain how neural networks learn.
assistant
Neural networks are a subset of machine learning algorithms inspired by the structure and function of the human brain. They are designed to learn from large datasets and are often used for tasks such as image recognition, natural language processing, and speech recognition. The primary goal of neural networks is to approximate the human brain's complex patterns and structures, making them ideal for solving complex problems in a wide range of fields including computer science, biology, economics, and finance.

1. **Model structure**: Neural networks consist of layers of interconnected nodes (neurons) that process information. Each layer has a specific in

KeyboardInterrupt: 

In [None]:
carrier = "Write a short paragraph explaining how neural networks learn."
secret = "1011001110001111"

stego_text = encode(carrier, secret)
recovered = decode(stego_text)

print("Stego text:")
print(stego_text)
print()
print("Original secret:", secret)
print("Recovered secret:", recovered)


Stego text:
[caption id:U7M2Afida] a snabbed photopollint eminates his nose at the top with a pep.

Original secret: 1011001110001111
Recovered secret: asnabbedphotopol
