# Python notebooks analysis with Azure Open AI

In [1]:
import datetime
import openai
import os
import sys
import requests

from bs4 import BeautifulSoup
from dotenv import load_dotenv
from langchain.chains import RetrievalQA
from langchain.chat_models import AzureChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.retrievers import AzureCognitiveSearchRetriever
from langchain.vectorstores import AzureSearch

In [2]:
sys.version

'3.10.10 (main, Mar 21 2023, 18:45:11) [GCC 11.2.0]'

In [3]:
print(f"Today is {datetime.datetime.today().strftime('%d-%b-%Y %H:%M:%S')}")

Today is 23-Oct-2023 09:37:12


In [4]:
print("Open AI version:", openai.__version__)

Open AI version: 0.28.1


## Web page to analyse

In [5]:
url = "https://github.com/Azure/azure-openai-samples/blob/main/use_cases/call_center/notebooks/call_center.ipynb"

In [6]:
DATA_DIR = "notebooks"

os.makedirs(DATA_DIR, exist_ok=True)

In [7]:
response = requests.get(url)

if response.status_code == 200:
    soup = BeautifulSoup(response.text, "html.parser")
    text = soup.get_text()
    text = text.replace("\n", "").replace("\r", "").replace("\t", "")
    # Saving the extracted text as a text file
    text_file = os.path.join(DATA_DIR, url.split("/")[-1] + ".txt")
    with open(text_file, "w") as file:
        file.write(text)
else:
    print(f"Failed to retrieve the web page. Status code: {response.status_code}")

In [8]:
!ls $DATA_DIR -lh

total 16K
-rwxrwxrwx 1 root root 16K Oct 23 09:37 call_center.ipynb.txt


In [9]:
docs = []

for file in [f for f in os.listdir(DATA_DIR) if f.endswith(".txt")]:
    print("Processing file {file}")
    try:
        loader = TextLoader(os.path.join(DATA_DIR, file), encoding="utf-8")
        docs.extend(loader.load_and_split())
    except Exception as e:
        pass

Processing file {file}


In [10]:
len(docs)

5

## Azure Cognitive Search index

In [11]:
index_name = "webcrawler-url"

In [12]:
load_dotenv("azure.env")

True

In [13]:
# Initialize our embedding model
embeddings = OpenAIEmbeddings(
    deployment=os.getenv("OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME"),
    model=os.getenv("OPENAI_ADA_EMBEDDING_MODEL_NAME"),
    openai_api_base=os.getenv("OPENAI_API_BASE"),
    openai_api_type="azure",
    chunk_size=1,
)

In [14]:
# Set our Azure Search
acs = AzureSearch(
    azure_search_endpoint=os.getenv("AZURE_COGNITIVE_SEARCH_ENDPOINT"),
    azure_search_key=os.getenv("AZURE_COGNITIVE_SEARCH_API_KEY"),
    index_name=index_name,
    embedding_function=embeddings.embed_query,
)

In [15]:
# Add documents to Azure Search
acs.add_documents(documents=docs)

['YzA5OTA1MDMtYTE2MS00ZjdhLWE1MDEtNzNiOGQ5ZjVjZmU1',
 'MDFmZDU0MjgtMjdiMS00MmY5LWEzYTQtYThlNTUyMWVkYWI0',
 'NDhlNTFlZTQtMGY1Zi00MGY0LTkzODEtOGY0ZmJhZjFhMmNm',
 'MDYzOWNjMjItNGY1Zi00YjRhLWJiNDQtZDMwM2EzYzQ2YzZj',
 'MGQwYmRmZDgtNTkxMy00NjJmLWJjOTMtYjIxNWZiOGY2ZTI2']

In [16]:
# Define Azure Cognitive Search as our retriever
retriever = AzureCognitiveSearchRetriever(
    content_key="content", top_k=5, index_name=index_name
)

In [17]:
# Set chatGPT 3.5 as our LLM
llm = AzureChatOpenAI(deployment_name="gpt-35-turbo-16k", temperature=0.2)

## Testing

In [18]:
# Define a template message
template = """
You are an AI Python expert powered by Azure Open AI.
You are going to analyse some python code.
Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""

QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

# Set the Retrieval QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=retriever,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
    return_source_documents=True,
)

In [19]:
def qa():
    """
    Get answer
    """
    result = qa_chain({"query": question, "chat_history": chat_history})
    chat_history.append((question, result))
    source = result["source_documents"][0].metadata["metadata"]

    print(f"Question: {question} \n")
    print(f"Answer: {result['result']} \n")
    print(f"{source}")

In [20]:
chat_history = []

In [21]:
question = "Who are you?"

res = qa()

Question: Who are you? 

Answer: I am an AI Python expert powered by Azure Open AI. I am here to help you with any Python code analysis or questions you may have. Thanks for asking! 

{"source": "notebooks/call_center.ipynb.txt"}


In [22]:
question = "Can you summary this code?"

qa()

Question: Can you summary this code? 

Answer: This Python code appears to be a Jupyter notebook that focuses on post-call transcription and analysis in a call center scenario using Azure OpenAI services. The code performs sentiment analysis and summarization on call center transcriptions.

The notebook is divided into different sections, each addressing a specific task. Here is a summary of each section:

1. Introduction: Provides an overview of the call center case and the tasks to be performed.

2. Environment Setup: Loads the necessary libraries and sets up the environment by loading the .env file.

3. Speech Recognition: Defines a function `recognize_speech_from_file` that uses Azure Cognitive Services Speech SDK to transcribe customer call recordings into text.

4. Sentiment Analysis: Transcribes a customer call from a WAV file (`good_review.wav`) and prints the transcribed text. Then, it creates a prompt for sentiment analysis using OpenAI's GPT-3 model and sends the prompt to O

In [23]:
question = "How many steps in this code?"

qa()

Question: How many steps in this code? 

Answer: There are 14 steps in this code. 

{"source": "notebooks/call_center.ipynb.txt"}


In [24]:
question = "Can you describe these steps?"

qa()

Question: Can you describe these steps? 

Answer: Based on the provided information, it seems that the code is part of a Jupyter Notebook file (call_center.ipynb) hosted on GitHub. The code snippet provided is not visible, so it is not possible to describe the specific steps or analyze the Python code.

If you have the actual code snippet or any specific questions about Python code analysis, please provide it, and I'll be happy to help you. Thanks for asking! 

{"source": "notebooks/call_center.ipynb.txt"}


In [25]:
question = "Can you optimize the step 1?"

qa()

Question: Can you optimize the step 1? 

Answer: In step 1, the code is loading the environment variables from a .env file using the `dotenv` library. It then sets the OpenAI API key and base URL using the loaded environment variables.

The code can be optimized by removing the unnecessary imports and reorganizing the code for better readability. Here's an optimized version of step 1:

```python
from dotenv import load_dotenv
from pathlib import Path

env_path = Path('../../../.env')  # Change with your .env file
load_dotenv(dotenv_path=env_path, override=True)

import os
import openai

openai.api_type = "azure"
openai.api_key = os.getenv('OPENAI_API_KEY')
openai.api_base = os.getenv('OPENAI_API_BASE')
openai.api_version = "2022-06-01-preview"

SPEECH_KEY = os.environ["SPEECH_API_KEY"]
COMPLETIONS_MODEL = os.environ["COMPLETIONS_MODEL"]
```

This optimized code removes the unnecessary imports and organizes the code in a more readable way. It also removes the duplicate import of the `os

In [26]:
question = "Can you optimize the extract metadata step?"

qa()

Question: Can you optimize the extract metadata step? 

Answer: To optimize the extract metadata step, you can consider the following:

1. Use a more efficient method to extract metadata from the code. Instead of parsing the code manually, you can use existing Python libraries such as `ast` or `inspect` to extract metadata like function names, variable names, and imports.

2. Use caching or memoization techniques to store the extracted metadata. If the code analysis is performed frequently on the same code, you can cache the metadata to avoid redundant analysis and improve performance.

3. Parallelize the metadata extraction process. If you have a large codebase or multiple code files to analyze, you can parallelize the extraction process by using multiprocessing or threading techniques. This can help utilize multiple CPU cores and speed up the analysis.

4. Optimize the data structures used to store the metadata. Depending on the specific requirements of your analysis, you can choose 

In [27]:
question = "Can you explain the speech to text step?"

qa()

Question: Can you explain the speech to text step? 

Answer: The speech to text step in the provided Python code is performed using the Azure Cognitive Services Speech SDK. Here's a breakdown of the code:

1. The function `recognize_speech_from_file(filename)` is defined to transcribe the speech from an audio file to text.
2. The function takes the filename of the audio file as input.
3. It sets up the subscription info for the Speech Service, including the speech key and service region.
4. It creates a `SpeechConfig` object and an `AudioConfig` object using the provided filename.
5. It creates a `SpeechRecognizer` object using the speech config and audio config.
6. It defines two callback functions: `stop_cb` and `recognize_cb`.
   - `stop_cb` is called when the speech recognition session is stopped and sets the `done` flag to True.
   - `recognize_cb` is called when speech is recognized and appends the recognized text to the `recognized_text_list`.
7. It connects the callback functio

In [28]:
question = "Is it possible to use another Azure technology for this step?"

qa()

Question: Is it possible to use another Azure technology for this step? 

Answer: Yes, it is possible to use another Azure technology for this step. Azure provides a wide range of services and technologies that can be integrated into your Python code. Depending on your specific requirements, you can explore options such as Azure Cognitive Services for speech recognition and sentiment analysis, Azure Text Analytics for text analysis, or Azure Machine Learning for building and deploying machine learning models. These services can be easily accessed and integrated into your Python code using the appropriate SDKs and APIs provided by Azure. 

{"source": "notebooks/call_center.ipynb.txt"}


In [29]:
question = "What are you main recommandation to improve all this code?"

qa()

Question: What are you main recommandation to improve all this code? 

Answer: Based on the provided code, here are some recommendations to improve it:

1. Use meaningful variable and function names: It is important to use descriptive names for variables and functions to make the code more readable and understandable. For example, instead of using generic names like `text`, `prompt`, or `recognize_speech_from_file`, consider using more specific names that convey their purpose.

2. Organize code into functions or classes: Breaking down the code into smaller functions or classes can improve modularity and reusability. It also makes the code easier to understand and maintain. Consider grouping related code together and encapsulating functionality into separate functions or classes.

3. Add comments and docstrings: Comments and docstrings help explain the purpose and functionality of the code. They make it easier for other developers (including yourself) to understand the code and its inte

In [30]:
chat_history

[('Who are you?',
  {'query': 'Who are you?',
   'chat_history': [...],
   'result': 'I am an AI Python expert powered by Azure Open AI. I am here to help you with any Python code analysis or questions you may have. Thanks for asking!',
   'source_documents': [Document(page_content='},","  \\"orig_nbformat\\": 4,","  \\"vscode\\": {","   \\"interpreter\\": {","    \\"hash\\": \\"2139c70ac98f3202d028164a545621647e07f47fd6f5d8ac55cf952bf7c15ed1\\"","   }","  }"," },"," \\"nbformat\\": 4,"," \\"nbformat_minor\\": 2","}"],"stylingDirectives":null,"csv":null,"csvError":null,"dependabotInfo":{"showConfigurationBanner":false,"configFilePath":null,"networkDependabotPath":"/Azure/azure-openai-samples/network/updates","dismissConfigurationNoticePath":"/settings/dismiss-notice/dependabot_configuration_notice","configurationNoticeDismissed":null,"repoAlertsPath":"/Azure/azure-openai-samples/security/dependabot","repoSecurityAndAnalysisPath":"/Azure/azure-openai-samples/settings/security_analysis",