In [2]:
import os
import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import SearchIndex, SimpleField, SearchableField, SearchField,VectorSearch, VectorSearchProfile, HnswAlgorithmConfiguration
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.models import VectorizedQuery
from azure.identity import DefaultAzureCredential
import openai
from openai import AzureOpenAI
from dotenv import load_dotenv
import PyPDF2

# Load environment variables
load_dotenv()

# Azure OpenAI and Azure Search credentials
SEARCH_ENDPOINT = os.getenv("AZURE_SEARCH_ENDPOINT")
SEARCH_ADMIN_KEY = os.getenv("AZURE_SEARCH_ADMIN_KEY")
azure_openai_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
azure_openai_key = os.getenv("AZURE_OPENAI_API_KEY")
azure_openai_version = os.getenv('API_VERSION')
azure_openai_embedding_deployment = "text-embedding-ada-002"
azure_openai_chat_model = "gpt-35-turbo"

# Constants
SEARCH_INDEX_NAME = "clinical_trial_test"

In [3]:
# def index_documents_to_azure_search(text_chunks):
#     """Generates embeddings and indexes chunks of text into Azure Cognitive Search."""
#     openai_credential = DefaultAzureCredential()
    
#     client = AzureOpenAI(
#         azure_deployment=azure_openai_embedding_deployment,
#         azure_endpoint=azure_openai_endpoint,
#         api_key=azure_openai_key,
#         azure_ad_token_provider=openai_credential if not azure_openai_key else None,
#         api_version=azure_openai_version
#     )

#     client_search = SearchClient(endpoint=SEARCH_ENDPOINT, index_name=SEARCH_INDEX_NAME, credential=AzureKeyCredential(SEARCH_ADMIN_KEY))

#     for i, chunk in enumerate(text_chunks):
#         document = {
#             "id": str(i),
#             "fileName": "DNDi-Clinical-Trial-Protocol-BENDITA-V5.pdf",  # Replace this with the actual file name
#             "content": chunk,
#             "contentEmbeddings": []  # Placeholder for embeddings
#         }

#         # Create embeddings for the content chunk
#         try:
#             response = client.embeddings.create(input=chunk, model="text-embedding-ada-002")
#             embeddings = response.data[0].embedding
#             document["contentEmbeddings"] = embeddings  # Store the embeddings
#         except Exception as e:
#             print(f"Error generating embeddings for chunk {i}: {e}")

#         # Upload the document with embeddings to Azure Search
#         try:
#             client_search.upload_documents(documents=[document])
#             print(f"Document chunk '{document['id']}' indexed successfully.")
#         except Exception as e:
#             print(f"Failed to index document chunk '{document['id']}': {e}")
####################################################################################################
def search_embeddings(user_question):
    client_search = SearchClient(endpoint=SEARCH_ENDPOINT, index_name=SEARCH_INDEX_NAME, credential=AzureKeyCredential(SEARCH_ADMIN_KEY))
    # Generate embeddings for the user's question using Azure OpenAI
    try:
        openai_credential = AzureKeyCredential(azure_openai_key)
        client = AzureOpenAI(
            azure_deployment=azure_openai_embedding_deployment,
            azure_endpoint=azure_openai_endpoint,
            api_key=azure_openai_key,
            api_version=azure_openai_version,
        )
        response = client.embeddings.create(input=user_question, model="text-embedding-ada-002").data[0].embedding

        vector_query = VectorizedQuery(vector=response, k_nearest_neighbors=3, fields="contentEmbeddings", exhaustive=True)
        # print("This is extracted vector",vector_query)
        results = client_search.search(
            search_text=None,
            vector_queries=[vector_query],
            select=["id", "content"],
            top=3
        )
        return results
    except Exception as e:
        print(f"Error generating embeddings or searching: {e}")
        return None

def get_chat_response(user_question):
    try:
        # First, search the embeddings using the user's question
        search_results = search_embeddings(user_question)
        
        # Prepare a system prompt based on search results (can be customized)
        prompt = """
                Answer the question as detailed as possible from the provided context, make sure to provide all the details. 
                If the answer is not in the provided context, just say, "answer is not available in the context", don't provide the wrong answer.\n\n
                Answer:
                """

        for result in search_results:
            prompt += f"{result['content']}\n\n"
        
        # Add the user's question to the prompt
        prompt += f"User question: {user_question}\n\n"
        
        # Use GPT-35-turbo for chat completion
        try:
            client = AzureOpenAI(
                azure_deployment=azure_openai_chat_model,
                azure_endpoint=azure_openai_endpoint,
                api_key=azure_openai_key,
                api_version=azure_openai_version,
            )
            response = client.chat.completions.create(
                model="gpt-35-turbo",
                messages=[{"role": "system", "content": prompt}]
            )
            output = response.choices[0].message.content

            # Return the response generated by GPT-35-turbo
            return {
                "response": output,
            }
        except Exception as e:
            raise Exception(f"Error generating response: {e}")
    
    except Exception as e:
        raise Exception(str(e))


In [5]:
get_chat_response("Screening criteria")

{'response': 'Screening criteria for the trial include: \n- Signed, written informed consent form  \n- Age between 18 and 50 years \n- Weight between 50kg and 80kg \n- Diagnosis of T. cruzi infection \n- Ability to comply with all protocol specified tests and visits and have a permanent address \n- Residents of areas free of vectorial transmission \n- No signs and/or symptoms of the chronic cardiac and/or digestive form of CD\n- No acute or chronic health conditions that, in the opinion of the principal investigator, may prevent participation in the trial.'}

---

In [12]:
def search_embeddings(user_question):
    client_search = SearchClient(endpoint=SEARCH_ENDPOINT, index_name=SEARCH_INDEX_NAME, credential=AzureKeyCredential(SEARCH_ADMIN_KEY))
    # Generate embeddings for the user's question using Azure OpenAI
    try:
        openai_credential = AzureKeyCredential(azure_openai_key)
        client = AzureOpenAI(
            azure_deployment=azure_openai_embedding_deployment,
            azure_endpoint=azure_openai_endpoint,
            api_key=azure_openai_key,
            api_version=azure_openai_version,
        )
        response = client.embeddings.create(input=user_question, model="text-embedding-ada-002").data[0].embedding

        vector_query = VectorizedQuery(vector=response, k_nearest_neighbors=1, fields="contentEmbeddings", exhaustive=True)

        
        # Perform the search using vector search
        search_results = client_search.search(
            search_text=None,
            vector_queries=[vector_query],
            select=["id", "content"],
            top=1
        )

        # Collect the results into a list (since SearchItemPaged is an iterable)
        # results_list = [result for result in search_results]
        return search_results #results_list
    except Exception as e:
        print(f"Error generating embeddings or searching: {e}")
        return None


In [17]:
def get_chat_response(user_question):
    try:
        # First, search the embeddings using the user's question
        search_results = search_embeddings(user_question)

        

        # Prepare a system prompt based on search result
        prompt = """
                Answer the question as detailed as possible from the provided context,
                make sure to provide all the details. If the answer is not in the provided context, 
                just say, "answer is not available in the context", don't provide the wrong answer.\n\n
                Context:\n
                """

        # Since there is only one result, directly access the first result
        prompt += f"{search_results['content']}\n\n"

        # Add the user's question to the prompt
        prompt += f"User question: {user_question}\n\n"

        # Use GPT-35-turbo for chat completion
        try:
            client = AzureOpenAI(
                azure_deployment=azure_openai_chat_model,
                azure_endpoint=azure_openai_endpoint,
                api_key=azure_openai_key,
                api_version=azure_openai_version,
            )
            response = client.chat.completions.create(
                model="gpt-35-turbo",
                messages=[{"role": "system", "content": prompt}]
            )
            output = response.choices[0].message.content

            # Return the response generated by GPT-35-turbo
            return {
                "response": output,
            }
        except Exception as e:
            raise Exception(f"Error generating response: {e}")

    except Exception as e:
        raise Exception(str(e))


In [18]:
get_chat_response("Screening criteria")

Exception: 'SearchItemPaged' object is not subscriptable