# Prompt optimisation

In [28]:
from hone import get_instill_engine, GeneralStringFunction
import textgrad as tg
from textgrad.autograd.function import BackwardContext
from instill.clients import init_pipeline_client
from google.protobuf.json_format import MessageToDict
import os
pipeline = init_pipeline_client(api_token=os.environ["INSTILL_API_TOKEN"])

In [29]:
engine = get_instill_engine(
    "gpt-3.5-turbo",
    namespace_id="george_strong",
    pipeline_id="textgrad-engine"
)

In [30]:
def retrieve_fn(revisor_prompt, user_query,
                namespace_id="george_strong",
                catalog_name="brown_bears",
                top_k=7,
                pipeline=pipeline):

    retrieve_response = MessageToDict(
        pipeline.trigger(
            namespace_id=namespace_id,
            pipeline_id="semantic-search",
            data=[{
                "catalog-name": catalog_name,
                "namespace": namespace_id,
                "revisor-prompt": revisor_prompt.value,
                "top-k": top_k,
                "user-query": user_query.value
            }]
        )
    )

    return str(retrieve_response['outputs'][0]['retrieved-chunks'])

In [31]:
retrieve = GeneralStringFunction(
    fn=retrieve_fn,
    function_purpose="Revises the user query using the revisor prompt and then retrieves relevant chunks using the revised query"
)

In [32]:
revisor_prompt = tg.Variable("do something", role_description="The query revisor prompt. This instructs a LLM how to revise the user query for search. This should be completely general and not contain any specific details about the user query, only how to modify the query for search.")
user_query = tg.Variable("where do bears live?", role_description="The user query", requires_grad=False)

test = retrieve(revisor_prompt, user_query)

In [16]:

#test = retrieve(revisor_prompt, user_query)

In [33]:
system_prompt = tg.Variable("Evaluate how relevant the retrieved chunks are to the user query.", role_description="The system prompt")
loss = tg.TextLoss(system_prompt, engine=engine)

In [34]:
optimizer = tg.TextualGradientDescent(parameters=[revisor_prompt], engine=engine)

In [35]:
# Convert user query to a Variable with proper formatting
user_query.value = f"User Query: {user_query}\n\n"

# Format the test results variable
test.value = f"Retrieved Information:\n{test.value}"

In [37]:
print(user_query.value + test.value)

User Query: where do bears live?

Retrieved Information:
[{'chunk-uid': '24ba950a-7fbe-40dc-a30a-74866631ab2a', 'source-file-name': 'Brown_bear_factsheet.pdf', 'similarity-score': 0.63671845, 'text-content': 'Brown bears are truly omnivorous with a diet varying from habitat to habitat and from season to season. Although they generally have a reputation as fearsome predators, this is largely untrue; in most cases, the majority of their diet is of vegetative origin and can include grasses, sedges, forbs (flowering plants), nuts, berries, fruits, roots, tubers, and bulbs. In addition, and depending upon location, they also consume small mammals, reptiles, honey, insects, and fish (notably salmon).\n\n![Brown bear feeding on salmon in the Kodiak National Wildlife Refuge (US Fish & Wildlife Service)](image1.jpg)\n\n9 Brown Bear Fact Sheet [www.bearconservation.org.uk](http://www.bearconservation.org.uk) Working for Bears Worldwide\n\n### SOCIAL BEHAVIOUR'}, {'chunk-uid': '43559da2-acc7-428b

In [38]:
loss_value = loss(user_query + test)

2024-12-10 09:18:19,147.147 INFO     LLMCall function forward


In [39]:
loss_value.value

'The retrieved information is relevant to the user query about where bears live. The chunks provide details about the habitat of brown bears, mentioning their range in countries like Russia, the United States, Canada, Europe, and Asia. The information also includes details about the diverse habitats where brown bears can be found, such as forests, mountains, tundra, meadows, and coastlines. Additionally, the chunks discuss the specific habitats required by brown bears for shelter and hibernation, as well as the threats to their habitats due to human activities like development, forestry, and climate change. Overall, the retrieved chunks provide relevant information about the habitats and ranges of bears in different regions.'

In [40]:
loss_value.backward(engine)

2024-12-10 09:18:42,103.103 INFO     _backward_through_llm prompt
2024-12-10 09:18:45,233.233 INFO     _backward_through_llm gradient
2024-12-10 09:18:45,235.235 INFO     _backward_through_llm prompt
2024-12-10 09:18:50,134.134 INFO     _backward_through_llm gradient
2024-12-10 09:18:50,135.135 INFO     Idempotent backward
2024-12-10 09:18:50,136.136 INFO     Idempotent backward
2024-12-10 09:18:50,136.136 INFO     _backward_through_string_fn
2024-12-10 09:18:55,237.237 INFO     _backward_through_string_fn gradient


In [41]:
revisor_prompt.gradients

{Variable(value=The current query revisor prompt "do something" is very vague and lacks specificity, which can limit the effectiveness of the search query revision process. To improve the system's performance and address the feedback provided, consider the following enhancements:
 
 1. **Be Actionable**: Instead of a generic phrase like "do something," provide specific actions or directives that guide the language model on how to revise the user query. For example, you could instruct the model to "expand on bear habitats," "include conservation information," or "add geographic details."
 
 2. **Focus on User Intent**: Tailor the revisor prompt to focus on understanding and fulfilling the user's intent behind the query. This could involve prompting the model to consider the user's likely information needs, such as "enhance understanding of bear habitats" or "provide comprehensive bear habitat details."
 
 3. **Encourage Depth and Variety**: Encourage the language model to delve deeper i

In [42]:
optimizer.step()

2024-12-10 09:18:55,254.254 INFO     TextualGradientDescent prompt for update
2024-12-10 09:18:58,040.040 INFO     TextualGradientDescent optimizer response
2024-12-10 09:18:58,041.041 INFO     TextualGradientDescent updated text


In [43]:
test.value

"Retrieved Information:\n[{'chunk-uid': '24ba950a-7fbe-40dc-a30a-74866631ab2a', 'source-file-name': 'Brown_bear_factsheet.pdf', 'similarity-score': 0.63671845, 'text-content': 'Brown bears are truly omnivorous with a diet varying from habitat to habitat and from season to season. Although they generally have a reputation as fearsome predators, this is largely untrue; in most cases, the majority of their diet is of vegetative origin and can include grasses, sedges, forbs (flowering plants), nuts, berries, fruits, roots, tubers, and bulbs. In addition, and depending upon location, they also consume small mammals, reptiles, honey, insects, and fish (notably salmon).\\n\\n![Brown bear feeding on salmon in the Kodiak National Wildlife Refuge (US Fish & Wildlife Service)](image1.jpg)\\n\\n9 Brown Bear Fact Sheet [www.bearconservation.org.uk](http://www.bearconservation.org.uk) Working for Bears Worldwide\\n\\n### SOCIAL BEHAVIOUR'}, {'chunk-uid': '43559da2-acc7-428b-9ca2-333b07315011', 'text

In [44]:
revisor_prompt.value

'Provide specific actions or directives that guide the language model on how to revise the user query, focusing on user intent, encouraging depth and variety, including search context, and incorporating user engagement.'

In [18]:
optimizer.zero_grad()