In [None]:
!pip install -r requirements.txt 

In [61]:
import os
import json
import shutil
import requests
from dotenv import load_dotenv
load_dotenv("credentials.env")
from azure.core.credentials import AzureKeyCredential


In [62]:

endpoint = os.getenv("AZURE_SEARCH_ENDPOINT")
credential = os.getenv("AZURE_SEARCH_KEY")
azure_openai_endpoint = os.getenv("AZURE_OPENAI_EMBEDDINGS_ENDPOINT")
openai_api_version = os.getenv("AZURE_OPENAI_API_VERSION")
azure_openai_apikey = os.getenv("AZURE_OPENAI_API_KEY")
credential = AzureKeyCredential(credential)
openai_api_key=os.getenv("OPENAI_API_KEY")



In [None]:
%pip install -qU langchain-openai
%pip install -qU langchain
%pip install -qU BeautifulSoup4


In [64]:
from langchain import hub
from langchain.agents import AgentExecutor, create_react_agent
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_openai import ChatOpenAI
from langchain.tools import Tool


In [65]:
from langchain.callbacks.base import BaseCallbackHandler
##Callback for seeing the realtime thinking process
class RealTimeCallbackHandler(BaseCallbackHandler):
    def on_tool_start(self, serialized, input_str, **kwargs):
        print(f"Agent is thinking: {input_str}")
    
    def on_tool_end(self, output, **kwargs):
        print(f"Agent received response: {output}")
    
    def on_tool_error(self, error, **kwargs):
        print(f"An error occurred: {error}")
    
    def on_agent_action(self, action, **kwargs):
        print(f"Agent action: {action.tool} with input: {action.tool_input}")
    
    def on_agent_finish(self, finish, **kwargs):
        print("Agent has finished thinking.")


In [66]:
# Choose the LLM to use
llm = ChatOpenAI()
# Construct the ReAct agent
prompt = hub.pull("hwchase17/react")


In [68]:
from langchain.pydantic_v1 import BaseModel, Field
from langchain.tools import BaseTool
from langchain.utilities import BingSearchAPIWrapper
from typing import Type, Optional
from langchain.callbacks.manager import AsyncCallbackManagerForToolRun, CallbackManagerForToolRun
import asyncio
from concurrent.futures import ThreadPoolExecutor

class SearchInput(BaseModel):
    query: str = Field(description="Should be a search query")

class MyBingSearch(BaseTool):
    """Tool for a Bing Search Wrapper"""

    # Add type annotations for all overridden attributes
    name: str = "Searcher"
    description: str = "Useful for searching the internet."
    args_schema: Type[BaseModel] = SearchInput

    k: int = 5  # Number of search results to return

    def _run(self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None) -> str:
        """Synchronous Bing search."""
        bing = BingSearchAPIWrapper(k=self.k)
        return bing.results(query, num_results=self.k)

    async def _arun(self, query: str, run_manager: Optional[AsyncCallbackManagerForToolRun] = None) -> str:
        """Asynchronous Bing search."""
        bing = BingSearchAPIWrapper(k=self.k)
        loop = asyncio.get_event_loop()
        results = await loop.run_in_executor(ThreadPoolExecutor(), bing.results, query, self.k)
        return results


In [69]:
def parse_html(content) -> str:
    soup = BeautifulSoup(content, 'html.parser')
    text_content_with_links = soup.get_text()
    words = text_content_with_links.split()
    first_100_words = ' '.join(words[:20])
    text_content_with_links = first_100_words
    return text_content_with_links

def fetch_web_page(url: str) -> str:
    HEADERS = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:90.0) Gecko/20100101 Firefox/90.0'}
    response = requests.get(url, headers=HEADERS)
    return parse_html(response.content)


web_fetch_tool = Tool.from_function(
    func=fetch_web_page,
    name="WebFetcher",
    description="useful to fetch the content of a url"
    )

In [70]:
from langchain.agents import create_react_agent, AgentExecutor
from langchain.prompts import PromptTemplate
from langchain.callbacks.manager import CallbackManager

# Initialize the tools (replace MyBingSearch and web_fetch_tool with your actual tools)
tools = [MyBingSearch(k=5), web_fetch_tool]

# Instantiate the callback handler
callback_handler = RealTimeCallbackHandler()
callback_manager = CallbackManager([callback_handler])

# Construct the OpenAI Tools agent
agent = create_react_agent(llm, tools, prompt)

# Create the agent executor with the callback manager
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=False, 
                               return_intermediate_steps=True, callback_manager=callback_manager, handle_parsing_errors=True)

# Invoke the agent and capture the result



In [None]:
# Initialize storage for iteration log
iteration_log = {}
iteration_number = 1
question_itr = 0

###Some questions for populating the cosmosdb database. The script will the prompt the user with y or n, if the user is satisfied with the answer type "y" if not type "n". 
questions = ["Most recent album published by Tyler the Creator", 
"When is the Microsoft Ignite conference?", 
"Who was appointed as the new Secretary of Defense in the latest U.S. administration?", 
"Who won the Nobel Peace Prize in 2024, and for what contributions?", 
"When did the United Nations announce its latest climate change report, and what were its main findings?", 
"Who has been elected the new President of the United States, and what are the implications of this election?",
"How has the recent U.S. presidential election, resulting in Donald Trump's victory, impacted international diplomatic relations and global market dynamics?"
    
]

# Loop through questions
while question_itr < len(questions):
    question = questions[question_itr]
    output = None

    # Attempt to invoke the agent and retrieve the answer
    while output is None:
        try:
            print(f"Question {question_itr + 1}: {question}")
            output = agent_executor.invoke({"input": question})
            print("Agent's answer:", output["output"])
        except Exception as e:
            print(f"Error occurred: {e}. Retrying...")

    # Prompt the user for satisfaction with the answer
    user_input = input("Are you satisfied with the answer? (y/n): ").strip().lower()

    if user_input == 'y':
        print("Thank you for your feedback. Moving to the next question.")
        iteration_log[iteration_number] = {
            "question": question,
            "agent_actions": [(step.tool, step.tool_input, step.log) for step, _ in output.get("intermediate_steps", [])],
            "answer": output.get("output", "No output available"),
            "user_satisfaction": user_input
        }
        iteration_number += 1
        question_itr += 1
    elif user_input == 'n':
        print("Thank you. Your feedback has been noted.")
        iteration_log[iteration_number] = {
            "question": question,
            "agent_actions": [(step.tool, step.tool_input, step.log) for step, _ in output.get("intermediate_steps", [])],
            "answer": output.get("output", "No output available"),
            "user_satisfaction": user_input
        }
        iteration_number += 1
        question_itr += 1  # You can choose whether to retry the question or move to the next
    else:
        print("Invalid input. Please enter 'y' for yes or 'n' for no.")
        continue

# Print the stored log for review
print("\nFinal Log:", iteration_log)


In [75]:
'''
In this, we will store the user feedback for further offline self reflection in cosmosDB

'''
import os
import random
from langchain_community.chat_message_histories import ChatMessageHistory, CosmosDBChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables import ConfigurableFieldSpec
from langchain_core.runnables.history import RunnableWithMessageHistory
from azure.cosmos import CosmosClient, PartitionKey
# Initialize the CosmosDB client

def store_final_log_in_cosmos(session_id: str, user_id: str, final_log: dict) -> None:
    # Configure Cosmos DB connection
    cosmos_endpoint = os.environ['AZURE_COSMOSDB_ENDPOINT']
    cosmos_database = os.environ['AZURE_COSMOSDB_NAME']
    cosmos_container = os.environ['AZURE_COSMOSDB_CONTAINER_NAME']
    cosmos_connection_string = os.environ['AZURE_COSMOSDB_CONNECTION_STRING']

    # Initialize Cosmos DB client
    client = CosmosClient.from_connection_string(cosmos_connection_string)

    # Get database and container
    database = client.get_database_client(cosmos_database)
    container = database.get_container_client(cosmos_container)

    # Prepare the data to store
    document = {
        "id": session_id,  # Unique identifier for the record
        "user_id": user_id,
        "final_log": final_log
    }

    # Store the data in the container
    container.upsert_item(document)
    print("Final log successfully stored in Cosmos DB.")


In [76]:
store_final_log_in_cosmos(session_id="session_12345", user_id="user_678900", final_log=iteration_log)


Final log successfully stored in Cosmos DB.


In [None]:
###Here we will implement the offline learning method using the react agent. Firstly, we will retrieve or stored json data from CosmosDB. 

##Fetching the data from cosmoDB and passing it on to the model for offile reinforcement learning. We will be using ReACT agent for that too. 

import os
from azure.cosmos import CosmosClient

def get_final_log_from_cosmos(session_id: str) -> dict:
    # Configure Cosmos DB connection
    cosmos_endpoint = os.environ['AZURE_COSMOSDB_ENDPOINT']
    cosmos_database = os.environ['AZURE_COSMOSDB_NAME']
    cosmos_container = os.environ['AZURE_COSMOSDB_CONTAINER_NAME']
    cosmos_connection_string = os.environ['AZURE_COSMOSDB_CONNECTION_STRING']

    # Initialize Cosmos DB client
    client = CosmosClient.from_connection_string(cosmos_connection_string)

    # Get database and container
    database = client.get_database_client(cosmos_database)
    container = database.get_container_client(cosmos_container)

    # Query to get the document by session_id
    query = f"SELECT * FROM c WHERE c.id = '{session_id}'"
    items = list(container.query_items(query=query, enable_cross_partition_query=True))

    # Check if any document was retrieved
    if not items:
        print(f"No document found for session_id: {session_id}")
        return None

    # Assuming session_id is unique, return the first matching document
    document = items[0]
    return document

# Example usage
retrieved_log = get_final_log_from_cosmos(session_id="session_12345")

if retrieved_log:
    print("Retrieved Final Log:")
    print(json.dumps(retrieved_log, indent=4))
else:
    print("No log found.")


retrieved_log.get("final_log")


In [None]:
final_log_offline = retrieved_log.get("final_log")
##The log that we got has been stored in the final_log_offline variable. We will now use this log to train the model offline.. 
final_log_offline

In [None]:
questions_offline = []

# Extract questions from final_log_offline
for key, value in final_log_offline.items():
    question = value.get('question')  # Get the 'question' field
    if question:  # Check if question exists
        questions_offline.append(question)

# Print or use the questions_offline array
print("Questions extracted:", questions_offline)

In [None]:
##It came to my attention that whenever we passed error proned chain of thought to the ReACT Agent for offline learning, it would get stuck on the error and not move forward with self reflection. replace_exceptions handles the instance of _Exception by replacing it with Error encountered to get rid of model's ambiguity. 
def replace_exceptions(data):
    """
    Recursively replace '_Exception' with 'Error encountered' in any data structure.

    Parameters:
    - data: The data structure (dict, list, str, etc.)

    Returns:
    - The data structure with '_Exception' replaced.
    """
    if isinstance(data, dict):
        return {key: replace_exceptions(value) for key, value in data.items()}
    elif isinstance(data, list):
        return [replace_exceptions(element) for element in data]
    elif isinstance(data, str):
        return data.replace('_Exception', 'Error encountered')
    else:
        return data


In [None]:
##replaces all the _Exception with Error encountered
final_log_offline = replace_exceptions(final_log_offline)


In [88]:
OFFLINE_PROMPT = """
YOUR FINAL OUTPUT SHOULD HAVE ATLEAST 200 WORDS AND MAKE IT GENERALIZED SO THAT IT CAN BE USED FOR FUTURE TASKS.
You have completed the task: '{question}' using the following steps: {agent_actions}. However, the user indicated dissatisfaction ('user_satisfaction' marked as 'n'). Reflect on your approach to identify areas for improvement and develop actionable strategies for future tasks.

Focus on these key aspects:

### Evaluate Your Approach:
- **Logical Flow:** Did your actions logically address the task? Were they aligned with user expectations?
- **Action Effectiveness:** Assess the relevance and impact of each action in addressing the task requirements.

### Identify Issues and Causes:
- **Errors/Blockers:** Recognize any challenges or failed attempts (e.g., resource inaccessibility, formatting errors).
- **Root Causes:** Analyze why these issues occurred and whether they stemmed from tool selection, search strategy, or external factors.

### Refine Navigation and Resource Use:
- **Search Optimization:** How could you improve your search queries or strategies for more relevant results?
- **Resource Exploration:** Identify alternative, reliable sources to mitigate errors or access issues.
- **Verification:** Emphasize checking resource reliability and accessibility before use.

### Enhance Reasoning and Adaptability:
- **Adapt to Challenges:** Reflect on how you handled unexpected issues and identify opportunities for greater flexibility.
- **Plan Ahead:** Consider whether better planning or foresight could prevent similar issues.

### Generalized Principles:
- **High-Level Strategies:** Develop adaptable strategies for handling similar tasks effectively in the future.
- **Best Practices:** Highlight principles for reasoning, planning, and navigating resources, focusing on reliability and adaptability.
- **Preventative Measures:** Suggest ways to avoid recurring issues, ensuring robust and user-aligned task execution.

**Structure your response:**
1. **Reflection on Performance:** Summarize your reasoning and task alignment.
2. **Improvement Insights:** Provide actionable suggestions for refining execution and resource use.
3. **Generalized Principles:** Offer strategies to approach similar tasks more effectively, emphasizing adaptability, planning, and navigation.
"""


In [None]:
# Assuming final_log_offline is a dictionary with structure like:
# final_log_offline = {
#     '1': {'question': 'First question text', 'agent_actions': [...]},
#     '2': {'question': 'Second question text', 'agent_actions': [...]},
#     ...
# }

if final_log_offline:
    for key, value in final_log_offline.items():
        while True:  # Retry loop
            try:
                # Extract the question and agent actions for the current key
                question = value.get("question", "No question provided.")
                agent_actions = value.get("agent_actions", [])

                # Format the OFFLINE_PROMPT with the extracted question and agent actions
                formatted_prompt = OFFLINE_PROMPT.format(
                    question=question,
                    agent_actions=agent_actions
                )

                # Invoke the agent executor with the formatted prompt
                output = agent_executor.invoke({"input": formatted_prompt})

                # Display the agent's answer
                print(f"Agent's answer for Question {key}:", output["output"])

                # Exit the retry loop if successful
                break

            except Exception as e:
                # Handle and display the error, then retry
                print(f"Error encountered for Question {key}: {e}. Retrying...")


In [90]:
###Let us convert the question into embedding so it can be used for semantic search
from azure.search.documents.indexes.models import SearchIndex
from langchain_openai import AzureOpenAIEmbeddings

def get_question_embeddings(question_title):
    openai_embeddings = AzureOpenAIEmbeddings(
        model="text-embedding-ada-002",
        chunk_size=1,  # Adjust based on your needs
        client=None,
        azure_endpoint="https://azure-openai-accelerator.openai.azure.com/openai/deployments/text-embedding-ada-002/embeddings?api-version=2023-05-15",
        openai_api_version="2023-05-15",
    )
    try:
        embedding = openai_embeddings.embed_documents(question_title)  # Get the embedding from the list
        return embedding  # Explicitly return the embedding
    except Exception as e:
        print(f"Error generating embedding for question: {e}")
        return None  # Return None if an error occurs

# Example usage




In [None]:
'''
Once, offline reinforcement learning is done we will store the self reflection answer in AzureAI Search Index with question being the indexer.

'''
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import SearchIndex
from typing import List
from azure.search.documents.indexes.models import (
    SearchIndex, SimpleField, SearchField, SearchFieldDataType,
    VectorSearch, HnswAlgorithmConfiguration, VectorSearchProfile,
    SemanticConfiguration, SemanticPrioritizedFields, SemanticField,
    VectorSearchCompression, ScalarQuantizationCompression,  SemanticSearch
)

# Define function to create a scalar-quantized index without truncation
def create_index(index_name, dimensions):
    vector_type = "Collection(Edm.Single)"

    # Define fields for the index
    fields = [
        SimpleField(name="id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=True),
        SearchField(name="Question_Title", type=SearchFieldDataType.String),
        SearchField(name="generalized_chunk", type=SearchFieldDataType.String),
        SearchField(name="question_embedding", type=vector_type, searchable=True, stored=True, vector_search_dimensions=dimensions, vector_search_profile_name="myHnswProfile")
    ]

    # Define scalar quantization compression configuration without truncation
    compression_name = "myCompression"
    compression_configurations = [
        ScalarQuantizationCompression(compression_name=compression_name)
    ]

    # Define vector search with compression
    vector_search = VectorSearch(
        algorithms=[
            HnswAlgorithmConfiguration(name="myHnsw")
        ],
        profiles=[
            VectorSearchProfile(name="myHnswProfile", algorithm_configuration_name="myHnsw", compression_name=compression_name)
        ],
        compressions=compression_configurations
    )

    # Define semantic configuration
    semantic_config = SemanticConfiguration(
        name="my-semantic-config",
        prioritized_fields=SemanticPrioritizedFields(
            title_field=SemanticField(field_name="Question_Title"),
            content_fields=[SemanticField(field_name="generalized_chunk")]
        )
    )
    semantic_search = SemanticSearch(configurations=[semantic_config])

    return SearchIndex(name=index_name, fields=fields, vector_search=vector_search, semantic_search=semantic_search) 

# Define index name and dimensions
embedding_dimensions = 1536
index_name = "semantic_memory_index"

# Create the SearchIndexClient
search_index_client = SearchIndexClient(endpoint=endpoint, credential=credential)

# Create the scalar-quantized index without truncation
index = create_index(index_name, dimensions=embedding_dimensions)
search_index_client.create_or_update_index(index)

print("Created scalar-quantized index for the semantic_memory_application")


In [92]:
from azure.search.documents import SearchClient
from typing import List

# Create the SearchClient to interact with the index
search_client = SearchClient(endpoint=endpoint, index_name=index_name, credential=credential)

# Function to insert a single document with embedding
def insert_single_embedding(embedding: List, question_title: str, generalized_chunk: str, id: str):
    document = {
        "id": id,  # Unique ID for this document
        "Question_Title": question_title,
        "generalized_chunk": generalized_chunk,
        "question_embedding": embedding[0]
    }

    # Upload the document to the index
    result = search_client.upload_documents(documents=[document])
    return result

# Iterate through the final_log_offline dictionary
for key, value in final_log_offline.items():
    try:
        # Extract question and embedding
        question = value.get("question", "No question provided.")
        embedding = get_question_embeddings([question])  # Replace with actual embedding function
        question_title = question
        generalized_chunk = value.get("answer", "No answer provided.")  # Replace with appropriate value

        # Insert the document into the index
        result = insert_single_embedding(embedding, question_title, generalized_chunk, key)
        print(f"Document with ID {key} inserted:", result)

    except Exception as e:
        print(f"Error inserting document with ID {key}: {e}")


Document with ID 1 inserted: [<azure.search.documents._generated.models._models_py3.IndexingResult object at 0x000001AC8051A940>]
Document with ID 2 inserted: [<azure.search.documents._generated.models._models_py3.IndexingResult object at 0x000001AC14229460>]
Document with ID 3 inserted: [<azure.search.documents._generated.models._models_py3.IndexingResult object at 0x000001AC14278D00>]
Document with ID 4 inserted: [<azure.search.documents._generated.models._models_py3.IndexingResult object at 0x000001AC1423D5E0>]
Document with ID 5 inserted: [<azure.search.documents._generated.models._models_py3.IndexingResult object at 0x000001AC1425CA30>]
Document with ID 6 inserted: [<azure.search.documents._generated.models._models_py3.IndexingResult object at 0x000001AC14272E80>]
Document with ID 7 inserted: [<azure.search.documents._generated.models._models_py3.IndexingResult object at 0x000001AC14282AF0>]


In [None]:
from azure.search.documents import SearchClient

# Create the SearchClient to interact with the index
question_new = "Who is the appointed Secretary of Health and Human Services in 2024?"
# Function to perform vector search
def search_similar_embeddings(question, k: int = 5):

    
    # Perform the search
    results = search_client.search(
        search_text=question,  # Input query for semantic search
        top=k,  # Number of top results to return
        query_type="semantic",  # Use semantic search
        semantic_configuration_name="my-semantic-config",  # Replace with your semantic configuration name
        query_caption="extractive",  # Extractive captions for concise explanations
        query_caption_highlight_enabled=True,  # Enable highlighting in captions
        select=["Question_Title", "generalized_chunk"] # Empty search text because we're using vector search
    )
    
    # Return the results
    return results

# Example query embedding

# Perform the search
generalized_chunk_semantic_memory =""
results = search_similar_embeddings(question_new, k=2)
for result in results:
    print(question_new)
    generalized_chunk_semantic_memory = result["generalized_chunk"]
    
print(generalized_chunk_semantic_memory)



In [95]:
SEMANTIC_MEMORY_PROMPT ="""You are a helpful agent. You will get a question and a generalized text on how you should approach the problem. Make sure to take the generalized text into consideration BUT DO NOT COMPLETELY RELY ON IT. Using the generalized approach, answer the given query and present the user with a comprehensive answer. Question: {new_question} and the generalized approach to follow: {results}"""

In [None]:
##Adding the semantic memory to the ReACT Agent
semantic_memory_prompt = SEMANTIC_MEMORY_PROMPT.format(new_question = question_new, results = generalized_chunk_semantic_memory)

In [None]:
# Initial attempt to invoke the agent with ReACT thought process
output = None
while output is None:
    try:
        output = agent_executor.invoke({"input": semantic_memory_prompt})
    except Exception as e:
        print(f"Error occurred: {e}. Retrying...")

# Display the initial answer
print("Agent's answer:", output["output"])