# Automatic Prompt Tuning

In [1]:
%pip install -q huggingface-hub==0.23.2
%pip install -q transformers==4.47.0
%pip install -q datasets==2.19.1
%pip install -q sentence-transformers==2.7.0
%pip install -q optuna==3.6.1

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
transformers 4.47.0 requires huggingface-hub<1.0,>=0.24.0, but you have huggingface-hub 0.23.2 which is incompatible.[0m[31m
[0m

## 0. Setup

In [2]:
import os
import yaml
from google.colab import drive
from getpass import getpass

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# Read YAML file
f_path = "/content/drive/MyDrive/GitHub/python-codebase/machine_learning/private_keys.yml"
with open(f_path, 'r') as stream:
    data_loaded = yaml.safe_load(stream)
os.environ['HF_API_TOKEN'] = data_loaded['HF_API_KEY']
os.environ['GITHUB_TOKEN'] = data_loaded['GITHUB_TOKEN']

In [4]:
# Set up token
from huggingface_hub import login
login(token=os.environ['HF_API_TOKEN'])

## 1. Initial example

In [5]:
import torch
import numpy as np
from transformers import AutoModelForCausalLM, AutoTokenizer
from sentence_transformers import SentenceTransformer, util
from datasets import load_dataset
from joblib import Parallel, delayed
from tqdm import tqdm

In [16]:
# Helper functions
def get_embedding(text):
    """Generate sentence embeddings for a given text."""
    return embedding_model.encode(text, convert_to_tensor=True)

def compute_similarity(output, target):
    """Compute cosine similarity between generated output and target text."""
    output_embedding = get_embedding(output)
    target_embedding = get_embedding(target)
    return util.cos_sim(output_embedding, target_embedding).item()

def generate_text(prompt, article):
    """Generate text using the language model given a prompt and article."""
    input_text = prompt + article
    inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512)
    outputs = lm.generate(**inputs, max_new_tokens=128, num_return_sequences=1, do_sample=True)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

def optimize_prompt(current_prompt, target, output, similarity):
    """Optimize the prompt using another LLM."""
    input_text = (
        f"Current Prompt: {current_prompt}\n"
        f"Target: {target}\n"
        f"Generated Output: {output}\n"
        f"Cosine Similarity: {similarity:.4f}\n"
        f"Provide an improved prompt:"
    )
    inputs = optimizer_tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512)
    outputs = optimizer_lm.generate(**inputs, max_new_tokens=128, num_return_sequences=1, do_sample=True)
    return optimizer_tokenizer.decode(outputs[0], skip_special_tokens=True)

In [11]:
# Load the language model and tokenizer
#lm_model_name = "EleutherAI/gpt-neo-125M"
#lm_model_name = "microsoft/Phi-3-mini-4k-instruct"
#lm_model_name = "akjindal53244/Llama-3.1-Storm-8B"
#lm_model_name = "gpt2-medium"
lm_model_name = "distilgpt2"
lm = AutoModelForCausalLM.from_pretrained(lm_model_name)
tokenizer = AutoTokenizer.from_pretrained(lm_model_name)

# Load a pre-trained embedding model for similarity calculation
embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
embedding_model = SentenceTransformer(embedding_model_name)

# Load another LLM for prompt optimization
#optimizer_model_name = "EleutherAI/gpt-neo-125M"  # Replace with a suitable model for prompt optimization
#optimizer_model_name = "microsoft/Phi-3-mini-4k-instruct"
#optimizer_model_name = "gpt2-medium"
optimizer_model_name = "distilgpt2"
optimizer_lm = AutoModelForCausalLM.from_pretrained(optimizer_model_name)
optimizer_tokenizer = AutoTokenizer.from_pretrained(optimizer_model_name)

In [12]:
# Load an example dataset for text generation
dataset = load_dataset("xsum", split="test[:15]")  # A dataset with input and target texts

# Base prompt to tune
base_prompt = "Summarize the following article: "

# Hyperparameters for prompt tuning
num_iterations = 5

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


In [13]:
# Prepare dataset articles and targets
articles = dataset["document"]  # Input texts for generation
targets = dataset["summary"]  # Target texts to match

dataset[0]

{'document': 'Prison Link Cymru had 1,099 referrals in 2015-16 and said some ex-offenders were living rough for up to a year before finding suitable accommodation.\nWorkers at the charity claim investment in housing would be cheaper than jailing homeless repeat offenders.\nThe Welsh Government said more people than ever were getting help to address housing problems.\nChanges to the Housing Act in Wales, introduced in 2015, removed the right for prison leavers to be given priority for accommodation.\nPrison Link Cymru, which helps people find accommodation after their release, said things were generally good for women because issues such as children or domestic violence were now considered.\nHowever, the same could not be said for men, the charity said, because issues which often affect them, such as post traumatic stress disorder or drug dependency, were often viewed as less of a priority.\nAndrew Stevens, who works in Welsh prisons trying to secure housing for prison leavers, said the

### Parallelize with Joblib

In [None]:
# Function to process each article and target
def process_article(article, target, optimized_prompt):
    generated_output = generate_text(optimized_prompt, article)
    similarity = compute_similarity(generated_output, target)
    return similarity, generated_output

In [None]:
if False:
  optimized_prompt = base_prompt
  best_generated_output = None
  NB_JOBS = 3

  for iteration in tqdm(range(num_iterations), desc="Iterations"):
      # Parallel processing of articles and targets
      results = Parallel(n_jobs=NB_JOBS)(delayed(process_article)(article, target, optimized_prompt) for article, target in zip(articles, targets))

      # Extract similarities and generated outputs
      similarities, generated_outputs = zip(*results)

      # Compute the average similarity for this iteration
      avg_similarity = np.mean(similarities)
      print(f"Iteration {iteration + 1}/{num_iterations}, Avg Similarity: {avg_similarity:.4f}")

      # Update prompt using the optimizer LLM
      if iteration < num_iterations - 1:
          best_generated_output = generated_outputs[np.argmax(similarities)]
          optimized_prompt = optimize_prompt(optimized_prompt, articles[0], best_generated_output, avg_similarity)

  print("Final optimized prompt:", optimized_prompt)

### Without parallelization

In [15]:
# Prompt tuning loop
optimized_prompt = base_prompt
for iteration in tqdm(range(num_iterations)):
    similarities = []

    for article, target in tqdm(zip(articles, targets)):
        # Generate output using the current prompt
        generated_output = generate_text(optimized_prompt, article)

        # Compute similarity between generated output and target
        similarity = compute_similarity(generated_output, target)
        similarities.append(similarity)

    # Compute the average similarity for this iteration
    avg_similarity = np.mean(similarities)
    print(f"Iteration {iteration + 1}/{num_iterations}, Avg Similarity: {avg_similarity:.4f}")

    # Update prompt using the optimizer LLM
    if iteration < num_iterations - 1:
        optimized_prompt = optimize_prompt(optimized_prompt, articles[0], generated_output, avg_similarity)

print("Final optimized prompt:", optimized_prompt)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Iteration 1/5, Avg Similarity: 0.5528


ValueError: Input length of input_ids is 512, but `max_length` is set to 128. This can lead to unexpected behavior. You should consider increasing `max_length` or, better yet, setting `max_new_tokens`.

In [18]:
optimized_prompt

'Current Prompt: Summarize the following article: \nTarget: Prison Link Cymru had 1,099 referrals in 2015-16 and said some ex-offenders were living rough for up to a year before finding suitable accommodation.\nWorkers at the charity claim investment in housing would be cheaper than jailing homeless repeat offenders.\nThe Welsh Government said more people than ever were getting help to address housing problems.\nChanges to the Housing Act in Wales, introduced in 2015, removed the right for prison leavers to be given priority for accommodation.\nPrison Link Cymru, which helps people find accommodation after their release, said things were generally good for women because issues such as children or domestic violence were now considered.\nHowever, the same could not be said for men, the charity said, because issues which often affect them, such as post traumatic stress disorder or drug dependency, were often viewed as less of a priority.\nAndrew Stevens, who works in Welsh prisons trying 