# Prompt optimisation

In [1]:
from hone import get_instill_engine
import textgrad as tg
from textgrad.autograd.function import BackwardContext
from instill.clients import init_pipeline_client
from google.protobuf.json_format import MessageToDict
import os
pipeline = init_pipeline_client(api_token=os.environ["INSTILL_API_TOKEN"])

In [2]:
engine = get_instill_engine(
    "gpt-3.5-turbo",
    namespace_id="george_strong",
    pipeline_id="textgrad-engine"
)

In [3]:
def retrieve_fn(revisor_prompt, user_query,
                namespace_id="george_strong",
                catalog_name="brown_bears",
                top_k=7,
                pipeline=pipeline):

    retrieve_response = MessageToDict(
        pipeline.trigger(
            namespace_id=namespace_id,
            pipeline_id="semantic-search",
            data=[{
                "catalog-name": catalog_name,
                "namespace": namespace_id,
                "revisor-prompt": revisor_prompt.value,
                "top-k": top_k,
                "user-query": user_query.value
            }]
        )
    )

    return str(retrieve_response['outputs'][0]['retrieved-chunks'])

In [4]:
class RetrieveFunction(tg.autograd.StringBasedFunction):
    def __init__(self, fn, function_purpose):
        super().__init__(
            fn=fn,
            function_purpose=function_purpose
        )

    def forward(self, revisor_prompt_var: tg.Variable, user_query_var: tg.Variable) -> tg.Variable:
        # Convert string inputs to Variables
        inputs = {
            "revisor_prompt": revisor_prompt_var,
            "user_query": user_query_var,
        }

        # Get chunks using the function
        chunks = self.fn(**inputs)

        # Create response variable with proper context
        response = tg.Variable(
            value=chunks,
            predecessors=[revisor_prompt_var, user_query_var],
            role_description="retrieved chunks",
            requires_grad=True
        )

        # Set gradient function
        response.set_grad_fn(BackwardContext(
            backward_fn=self.backward,
            response=response,
            function_purpose=self.function_purpose,
            inputs=inputs
        ))

        return response

In [5]:
retrieve = RetrieveFunction(
    fn=retrieve_fn,
    function_purpose="Revise the user query using revisor prompt and retrieve relevant chunks using revised query"
)

In [6]:
revisor_prompt = tg.Variable("make a search query", role_description="The query revisor prompt. This should not be specific, but rather instructs a LLM to make the provided user query better for search")
user_query = tg.Variable("where do bears live?", role_description="The user query", requires_grad=False)

test = retrieve(revisor_prompt, user_query)

In [7]:

#test = retrieve(revisor_prompt, user_query)

In [8]:
system_prompt = tg.Variable("Evaluate how relevant the retrieved chunks are to the user query", role_description="The system prompt")
loss = tg.TextLoss(system_prompt, engine=engine)

In [9]:
optimizer = tg.TextualGradientDescent(parameters=[revisor_prompt], engine=engine)

In [10]:
# Convert user query to a Variable with proper formatting
user_query.value = f"User Query: {user_query}\n\n"

# Format the test results variable
test.value = f"Retrieved Information:\n{test.value}"

In [11]:
loss_value = loss(user_query + test)

2024-12-06 11:52:41,487.487 INFO     LLMCall function forward


In [12]:
loss_value.value

'The retrieved chunks provide relevant information regarding the habitats of brown bears, which partially addresses the user query about where bears live. Here is an evaluation of the relevance of each chunk:\n\n1. **Chunk 1**: Discusses habitat protection and the need for large, undisturbed areas for brown bears, highlighting the impact of human activities on their habitats. This is relevant as it provides context on the types of environments brown bears require.\n\n2. **Chunk 2**: Focuses on the diet of brown bears rather than their habitat, making it less relevant to the query about where bears live.\n\n3. **Chunk 3**: Lists specific regions where brown bears are found, such as Russia, the United States, and Canada. This directly answers the query by providing geographical locations of brown bear habitats.\n\n4. **Chunk 4**: Describes the physical characteristics of brown bears rather than their habitat, making it less relevant to the query.\n\n5. **Chunk 5**: Provides a broad overv

In [13]:
loss_value.backward(engine)

2024-12-06 11:52:41,500.500 INFO     _backward_through_llm prompt
2024-12-06 11:52:48,139.139 INFO     _backward_through_llm gradient
2024-12-06 11:52:48,139.139 INFO     _backward_through_llm prompt
2024-12-06 11:52:54,309.309 INFO     _backward_through_llm gradient
2024-12-06 11:52:54,311.311 INFO     Idempotent backward
2024-12-06 11:52:54,311.311 INFO     Idempotent backward
2024-12-06 11:52:54,312.312 INFO     _backward_through_string_fn
2024-12-06 11:53:02,236.236 INFO     _backward_through_string_fn gradient


In [14]:
revisor_prompt.gradients

{Variable(value=To improve the query revisor prompt and better align with the objective function, consider the following feedback:
 
 1. **Encourage Specificity**: The current prompt "make a search query" is too vague and does not guide the LLM to refine the user query effectively. Encourage the LLM to ask clarifying questions or suggest more specific terms. For example, it could prompt the user to specify the type of bear or the geographical area of interest, which would help in retrieving more relevant information.
 
 2. **Incorporate Contextual Awareness**: The prompt should instruct the LLM to consider the context of the query. For instance, if the query is about animal habitats, the LLM should prioritize refining the query to focus on geographical and environmental aspects rather than unrelated topics like diet or behavior.
 
 3. **Guide Towards Relevance**: The prompt should encourage the LLM to focus on the core intent of the query. In this case, the intent is to find out where 

In [15]:
optimizer.step()

2024-12-06 11:53:02,248.248 INFO     TextualGradientDescent prompt for update
2024-12-06 11:53:05,146.146 INFO     TextualGradientDescent optimizer response
2024-12-06 11:53:05,148.148 INFO     TextualGradientDescent updated text


In [16]:
test.value

"Retrieved Information:\n[{'chunk-uid': '1780f503-d8c2-41d3-b772-983b70f2c8bb', 'source-file-name': 'Brown_bear_factsheet.pdf', 'similarity-score': 0.60865873, 'text-content': 'Below we detail some of the initiatives already taking place, together with those additionally required, to ameliorate these issues.\\n\\n#### Habitat protection and restoration\\n\\nAll bears are wide-ranging, and excepting the polar bear, brown bears have the largest ranges of all; needing large areas of undisturbed habitat if they are to thrive. With a rapidly growing human population, the pressures on brown bear habitat are significant in many of the places where they are found. Direct pressure from human habitation is not often significant as brown bears are usually found in very remote areas. However, indirect pressure caused by logging, clearance of land for agriculture, mining, and oil drilling is a serious problem in many places. Given the requirement for extensive ranges, habitat fragmentation can be j

In [17]:
revisor_prompt.value

'Refine the user query to enhance search relevance by considering specificity, context, and core intent. Encourage clarifying questions or suggest specific terms related to the topic, such as geographical area or type. Focus on the main intent, like habitat or location, and promote diverse sources. Engage with the user for feedback and further refinement.'

In [18]:
optimizer.zero_grad()