# Code analysis with Langchain + Azure OpenAI + Azure Cognitive Search (vector store)

The following demo will show how to analyze your existing by using both Azure OpenAI and Search with the help of Langchain.

**LangChain** is an open-source framework that simplifies the creation of applications using large language models (LLMs). It provides a standard interface for chains, lots of integrations with other tools, and end-to-end chains for common applications. You can use it to connect a language model to other sources of data, and allow it to interact with its environment.


In [None]:
import os
import json
import sys

#from dotenv import load_dotenv
from langchain.chat_models import AzureChatOpenAI
from langchain.chains import RetrievalQA
from langchain.retrievers import AzureCognitiveSearchRetriever
from langchain.prompts import PromptTemplate
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import AzureSearch

In [None]:
sys.version

## Documents

In [None]:
!ls notebooks/*.*

Analyze the 3 example notebooks for customized code analysis

In [None]:
root_dir = "notebooks"

# Loop through the folders
docs = []
for dirpath, dirnames, filenames in os.walk(root_dir):
    for file in filenames:
        print(file)
        try:
            loader = TextLoader(os.path.join(dirpath, file), encoding="utf-8")
            docs.extend(loader.load_and_split())
        except Exception as e:
            pass

In [None]:
# Split into chunk of texts
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(docs)

We are going to load the settings from GitHub Codespace secrets instead

In [None]:
#load_dotenv("azure.env")

**Make sure this settings exist on your GitHub repository Codespace secrets!**


In my case both the model and deployment are named "text-embedding-ada-002"

>NOTE: it takes a few minutes to add the document embeddings to search (7 minutes for me)


In [None]:

# Initialize our embedding model
embeddings = OpenAIEmbeddings(
    deployment=os.getenv("OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME"),
    model=os.getenv("OPENAI_ADA_EMBEDDING_MODEL_NAME"),
    openai_api_base=os.getenv("AZURE_OPENAI_ENDPOINT"),
    openai_api_key=os.getenv("AZURE_OPENAI_API_KEY"), 
    openai_api_type="azure",
    chunk_size=1,
)

index_name = "index-pythonnotebooks"

# Set our Azure Search
acs = AzureSearch(
    azure_search_endpoint=os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT"),
    azure_search_key=os.getenv("AZURE_SEARCH_ADMIN_KEY"),
    index_name=index_name,
    embedding_function=embeddings.embed_query,
)


In [None]:

# Add documents to Azure Search
acs.add_documents(documents=texts)

In [None]:
api_key = os.getenv("AZURE_SEARCH_ADMIN_KEY") 
# Define Azure Cognitive Search as our retriever
retriever = AzureCognitiveSearchRetriever(
    content_key="content", top_k=10, index_name=index_name, api_key=api_key
    
)

In [None]:

# Set chatGPT 3.5 as our LLM
llm = AzureChatOpenAI(deployment_name=os.getenv("AZURE_OPENAI_MODEL_CHAT"), temperature=0.7, max_tokens=100, 
        openai_api_key=os.getenv("AZURE_OPENAI_API_KEY"), 
        openai_api_base=os.getenv("AZURE_OPENAI_ENDPOINT"),
        openai_api_version=os.getenv("AZURE_OPENAI_MODEL_CHAT_VERSION"),
        )

Commented not to expose settings

In [None]:
#retriever

In [None]:
#llm

## Testing

In [None]:
# Define a template message
template = """Use the following pieces of context to answer the question at the end. 
You are a python expert and you should demonstrate some python knowledge.
If you don't know the answer, just say that you don't know, don't try to make up an answer. 
Use three sentences maximum and keep the answer as concise as possible. 
Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""

QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

# Set the Retrieval QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=retriever,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
    return_source_documents=True,
)

In [None]:
questions = ["Could you explain the notebook 01 Image Analysis.ipynb"]

chat_history = []

for question in questions:
    result = qa_chain({"query": question, "chat_history": chat_history})
    # chat_history.append((question, result))
    print(f"Question: {question} \n")
    print(f"Answer: {result['result']} \n")
    print(
        f"Source: {json.loads(result['source_documents'][0].metadata['metadata'])['source']} \n"
    )

In [None]:
questions = ["How to get image captions? Show me a python code"]

chat_history = []

for question in questions:
    result = qa_chain({"query": question, "chat_history": chat_history})
    # chat_history.append((question, result))
    print(f"Question: {question} \n")
    print(f"Answer: {result['result']} \n")
    print(
        f"Source: {json.loads(result['source_documents'][0].metadata['metadata'])['source']} \n"
    )

In [None]:
questions = ["Explain the notebook 03 Background removal.ipynb"]

chat_history = []

for question in questions:
    result = qa_chain({"query": question, "chat_history": chat_history})
    # chat_history.append((question, result))
    print(f"Question: {question} \n")
    print(f"Answer: {result['result']} \n")
    print(
        f"Source: {json.loads(result['source_documents'][0].metadata['metadata'])['source']} \n"
    )

In [None]:
questions = ["How to remove background from an image using Azure AI?"]

chat_history = []

for question in questions:
    result = qa_chain({"query": question, "chat_history": chat_history})
    # chat_history.append((question, result))
    print(f"Question: {question} \n")
    print(f"Answer: {result['result']} \n")
    print(
        f"Source: {json.loads(result['source_documents'][0].metadata['metadata'])['source']} \n"
    )

In [None]:
questions = ["How to get image captions?"]

chat_history = []

for question in questions:
    result = qa_chain({"query": question, "chat_history": chat_history})
    # chat_history.append((question, result))
    print(f"Question: {question} \n")
    print(f"Answer: {result['result']} \n")
    print(
        f"Source: {json.loads(result['source_documents'][0].metadata['metadata'])['source']} \n"
    )