# Lanchain with Azure Open AI (PDF files) and Azure Cognitive Search

In [1]:
import json
import os

from dotenv import load_dotenv
from langchain.chat_models import AzureChatOpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.retrievers import AzureCognitiveSearchRetriever
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import AzureSearch

In [2]:
load_dotenv('azure.env')

True

In [3]:
openai_ada_embedding_deployment_name = os.getenv('OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME')
openai_ada_embedding_model_name = os.getenv('OPENAI_ADA_EMBEDDING_MODEL_NAME')
openai_api_base = os.getenv('OPENAI_API_BASE')
azure_cognitive_search_endpoint = os.getenv('AZURE_COGNITIVE_SEARCH_ENDPOINT')
azure_cognitive_search_api_key = os.getenv('AZURE_COGNITIVE_SEARCH_API_KEY')
#azure_cognitive_search_service_name = os.getenv('AZURE_COGNITIVE_SEARCH_SERVICE_NAME')
#azure_cognitive_search_index_name = os.getenv('AZURE_COGNITIVE_SEARCH_INDEX_NAME')

## PDF files

In [4]:
docs_dir = "docs"

In [5]:
os.listdir(docs_dir)

['.ipynb_checkpoints', 'ford.pdf', 'mercedes.pdf']

In [6]:
pdfdoc = "docs/ford.pdf"

In [7]:
#use langchain PDF loader
loader = PyPDFLoader(pdfdoc)

#split the document into chunks
pages = loader.load_and_split()

In [8]:
# Initialize our embedding model
embeddings=OpenAIEmbeddings(deployment=openai_ada_embedding_deployment_name,
                                model=openai_ada_embedding_model_name,
                                openai_api_base=openai_api_base,
                                openai_api_type="azure",
                                chunk_size=1)

embeddings

OpenAIEmbeddings(client=<class 'openai.api_resources.embedding.Embedding'>, model='text-embedding-ada-002', deployment='text-embedding-ada-002', openai_api_version='2023-05-15', openai_api_base='https://azure-openai-serge.openai.azure.com', openai_api_type='azure', openai_proxy='', embedding_ctx_length=8191, openai_api_key='8d0786663aa1480f9dee3c9edd842b1a', openai_organization='', allowed_special=set(), disallowed_special='all', chunk_size=1, max_retries=6, request_timeout=None, headers=None, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={})

In [9]:
index_name = 'aoai-pdfcarsdocs'
index_name

'aoai-pdfcarsdocs'

In [10]:
# Loading to the Azure Cognitive Search index
acs = AzureSearch(azure_search_endpoint=azure_cognitive_search_endpoint,
                 azure_search_key=azure_cognitive_search_api_key,
                 index_name=index_name,
                 embedding_function=embeddings.embed_query)

# Add documents to Azure Search
acs.add_documents(documents=pages)

['OWU2YjcwYmEtYzYxMS00YWE3LThmNDgtZmZhMTZhOGViMGMz',
 'MDY1MGQwOGItNGE1MS00YmY0LTljZWMtZDk3Zjk1NTNiZDIy',
 'Y2U3NjljNmMtZmI1NC00ZGRiLWFhNjgtNmI0MGIwOTQzYTFh',
 'ZDM4MzA2YTUtNTcxNy00OTc5LThiYTItYTZiZWIzYzYyOGM1',
 'ZjI5YjFkY2MtYzk5Yy00MGM3LWJjYjQtZWUwOTdlZjNjZTVh',
 'ZTk1NzZhZTAtMTEwMC00MDIzLTkyNzItM2Y0YTFjYTg0ODEw',
 'OTc2MzMxY2UtYmQ0ZC00YjMzLWI5OTItZWQ5YTkwMDAxZjIw',
 'NGRjNTg5OGQtMWE5NC00NzFhLWIwM2QtZmU1ODZiNWU0ZDAz',
 'OWNjZDY1YWUtNGVjMi00NTQ1LTgyYTUtMzZkN2FlZTQ1YjNl',
 'MWYxMWY0MjktMjI0YS00Y2EzLTgwNjYtMTlhZjI0NzEwNDMz',
 'NDhlNDJjZGEtYzZjZC00NjUwLWJlZjQtYzRiNTJhNjBiYzQ3',
 'YmYwODliNzItZTIwYy00YzI5LWJhNzYtOWY0MWI0ZjU2M2Yy',
 'MDMyMmIzZGEtM2E0Yi00NzUxLThhMjUtYTBmYmU1Y2I2OWYz',
 'ODQxYWNjZmItOGMwZi00ZWE1LTg1ZGYtZWJlMDE3NWEwOWRj',
 'ZmYwNzRjZGMtMTdmNS00NDRjLWJlODktZDQ2YTFhYjA2MzRm',
 'N2ExNTk1MzAtNGQ2My00MTFiLThhNTAtOGFiMTA2OGM1ZGNj',
 'OGQ5NWQ1MDItYmFmYi00YzZjLWExZmEtNjMzYjY5OWQyMGY3',
 'ODJjOWFmODctZmM0OC00YjY4LWExODEtYzEyODY0OGQ5NDVm',
 'MWU0ZTI0MDktMzVjNy00ZmQ3LWEwZTYtMWM0NmVhZWM2

In [11]:
pdfdoc = "docs/mercedes.pdf"

# langchain PDF loader
loader = PyPDFLoader(pdfdoc)

# Split the document into chunks
pages = loader.load_and_split()

acs.add_documents(documents=pages)

Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised ServiceUnavailableError: The server is overloaded or not ready yet..


['YjJiOWUwZmYtMmRlYy00YmM0LWFmY2UtNzE3MjEwZDllODI5',
 'YmY1YWZlNDYtZmU3MC00YTlhLThlMzQtNGQ1YjdiNGI5OTM5',
 'OThlOTY3YmQtNzIzZC00NThjLTkzNWQtZDc2ZjNhM2U2YzM4',
 'YzNhMGIwOTMtNTRiYi00ODNiLTk4NDEtMjgyZDcwN2E4NTkx',
 'YjE3NDZjMjUtYTFjNy00ZGU4LWJjZjMtYmFmNjlhYTVlYmMz',
 'Y2U3ZDY2NWItMjA0My00YzQ1LWE1OWMtMTYzNjkwNGIxZTc4',
 'NmJkYjBiMjgtODU0ZC00OGIwLThhMTgtN2ZiZmFiMzgwZjRj',
 'ZDg2YjAwMzUtMTEwZS00NTIzLTk1YzEtNzE2ZDk1OTcxZGNk',
 'MTIxMGY3ZmItODU2Zi00YjVhLWE0MzItYWE0NWNmNjVlOGFm',
 'NjZiNWZhMWYtNDAwZC00NDAzLThlZGEtZGMxNDY5ZWVjOTMw',
 'NjVmNzg4NmMtMjIwOS00MzI1LWIzZDItNmU1MDg2MDM0OGU2',
 'NGVlNWVkYjQtYTdhMC00YTJiLWJiNGYtNTAxODc0NThiY2Zl',
 'NWY4N2Y1MjUtNWMwZS00MDZmLTg5OWItYzVjZWVmN2IxZjU3',
 'Y2EwMjBjYTItYmEwNy00MDQ2LWE1ZDItZDA4NWFmYmIzNTU3',
 'NjcxYWEwODctNWYzYi00YmU4LTg5ZDYtYjI2YTVkNDhjNmM5',
 'ODFhNzI1MDgtMWQyZC00NTUyLTk1M2QtZWY5OWM4YTIzMDM2',
 'MGQ2NTU3ZGItOGViZS00ZmZkLTkxMTMtYTk2MWQ1NWU1NzEz',
 'MmExNWVhOWUtYmMxYy00OTMzLWJjODMtZGU0YmNkYWRjMjUy',
 'MWNjZDA0NTUtNTY3Yy00NDRmLWFjNDktZWUwZGRmZDhk

In [12]:
# Define Azure Cognitive Search as our retriever
retriever = AzureCognitiveSearchRetriever(content_key="content",
                                          top_k=10,
                                          index_name=index_name)

In [13]:
retriever

AzureCognitiveSearchRetriever(tags=None, metadata=None, service_name='azurecogsearcheastussr', index_name='aoai-pdfcarsdocs', api_key='ViHEHiP4CdH3zH0BYLDgHG0DKr6yHoTwbWXR4F90ujAzSeDP6Y0a', api_version='2020-06-30', aiosession=None, content_key='content', top_k=10)

In [14]:
# Set chatGPT 3.5 as our LLM (the deployment_name should be deployed under the Azure Open AI Studio)
llm = AzureChatOpenAI(deployment_name="gpt-35-turbo-16k", temperature=0.8)

In [15]:
llm

AzureChatOpenAI(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, model_name='gpt-3.5-turbo', temperature=0.8, model_kwargs={}, openai_api_key='8d0786663aa1480f9dee3c9edd842b1a', openai_api_base='https://azure-openai-serge.openai.azure.com', openai_organization='', openai_proxy='', request_timeout=None, max_retries=6, streaming=False, n=1, max_tokens=None, tiktoken_model_name=None, deployment_name='gpt-35-turbo-16k', model_version='', openai_api_type='azure', openai_api_version='2023-05-15')

## Testing

In [16]:
# Define a template message
template = """Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer. 
Use five sentences maximum and keep the answer as concise as possible. 
Always say "Have a good day!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""

QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

# Set the Retrieval QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=retriever,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
    return_source_documents=True,
)

In [17]:
questions = ['How to connect setup the radio on my Mercedes car?', 'how to change the radio of my Ford car?']

chat_history = []

for question in questions:
    result = qa_chain({"query": question, "chat_history": chat_history})
    #chat_history.append((question, result))
    print(f"Question: {question} \n")
    print(f"Answer: {result['result']} \n")
    print(f"Source: {json.loads(result['source_documents'][0].metadata['metadata'])['source']} \n")

Question: How to connect setup the radio on my Mercedes car? 

Answer: To set up the radio on your Mercedes car, follow these steps:

1. Press the audio option on the feature bar.
2. Select Sources.
3. Choose FM for radio stations.
4. Use the seek buttons to automatically change radio stations or rotate the control to manually change stations.
5. Select a radio station from the list if available.
6. Adjust the sound settings by pressing the sound button.
7. You can also switch the screen on and off and set the sensitivity of the speed compensated volume.

Please note that these instructions may vary depending on the specific model and multimedia system of your Mercedes car. For more detailed instructions, refer to your vehicle's owner's manual or contact a Mercedes-Benz service centre.

Have a good day! 

Source: docs/mercedes.pdf 

Question: how to change the radio of my Ford car? 

Answer: To change the radio station in your Ford car, you can use the seek buttons or rotate the contro

In [18]:
questions = ['How to connect setup the radio on my Ford car?']

chat_history = []

for question in questions:
    result = qa_chain({"query": question, "chat_history": chat_history})
    #chat_history.append((question, result))
    print(f"Question: {question} \n")
    print(f"Answer: {result['result']} \n")
    print(f"Source: {json.loads(result['source_documents'][0].metadata['metadata'])['source']} \n")

Question: How to connect setup the radio on my Ford car? 

Answer: To set up the radio on your Ford car, follow these steps:
1. Turn on your car's ignition.
2. Press the audio button on the feature bar to access the audio system.
3. Use the touchscreen or control buttons to select the radio as the audio source.
4. Use the touchscreen or control buttons to manually tune to a specific radio station or use the seek buttons to automatically search for stations.
5. Adjust the volume using the volume control buttons or touchscreen.
6. To save a favorite radio station, tune to the desired station and press and hold the corresponding preset button until you hear a beep.
7. To switch between FM and AM radio, use the sources option on the touchscreen and select FM or AM.
8. To change radio stations from a list, select the sources option on the touchscreen, then select FM or AM, and finally select Stations. Choose a radio station from the list.
9. To adjust sound settings, press the sound setting

In [19]:
questions = ['What is MBUX?']

chat_history = []

for question in questions:
    result = qa_chain({"query": question, "chat_history": chat_history})
    #chat_history.append((question, result))
    print(f"Question: {question} \n")
    print(f"Answer: {result['result']} \n")
    print(f"Source: {json.loads(result['source_documents'][0].metadata['metadata'])['source']} \n")

Question: What is MBUX? 

Answer: MBUX stands for Mercedes-Benz User Experience. It is a multimedia system in Mercedes-Benz vehicles that allows for voice control and operation of various functions such as navigation, phone calls, and media. It can be activated by saying "Hey Mercedes" or pressing a button on the steering wheel. MBUX also includes features like In-Car Office, web browsing, and integration with mobile devices. Have a good day! 

Source: docs/mercedes.pdf 



In [20]:
questions = ['How to connect my smartphone using the MBUX?']

chat_history = []

for question in questions:
    result = qa_chain({"query": question, "chat_history": chat_history})
    #chat_history.append((question, result))
    print(f"Question: {question} \n")
    print(f"Answer: {result['result']} \n")
    print(f"Source: {json.loads(result['source_documents'][0].metadata['metadata'])['source']} \n")

Question: How to connect my smartphone using the MBUX? 

Answer: To connect your smartphone using the MBUX, activate the MBUX voice assistant by saying "Hey Mercedes" or pressing the £ button on the multifunction steering wheel. Then, say a voice command to operate the smartphone functions such as making a call or sending a text message. You can also use the MBUX voice assistant to operate other functions like navigation, radio, and vehicle settings. Make sure to familiarize yourself with the voice control system functions before starting your journey. Have a good day! 

Source: docs/mercedes.pdf 

