# Access the resources of Azure OpenAI using API calls

### Setup your local environment

In [6]:
pip install -r requirements.txt

Collecting python-dotenv
  Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)
Collecting openai
  Downloading openai-0.28.1-py3-none-any.whl (76 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.0/77.0 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
Collecting streamlit
  Downloading streamlit-1.27.1-py2.py3-none-any.whl (7.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.5/7.5 MB[0m [31m91.1 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hCollecting langchain==0.0.245
  Downloading langchain-0.0.245-py3-none-any.whl (1.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m64.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting azure-search-documents==11.4.0b6
  Downloading azure_search_documents-11.4.0b6-py3-none-any.whl (306 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m306.1/306.1 kB[0m [31m34.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tiktoken
  Downloading tiktoken-

### Run a query on a local csv file by creating local embeddings

Import required libraries

In [95]:
import os
import openai
import requests
import numpy as np
import pandas as pd
from openai.embeddings_utils import get_embedding, cosine_similarity

You also need to [deploy a new model](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/create-resource?pivots=web-portal#deploy-a-model). You need to select and deploy `text-embedding-ada-0021`. If you get an error downstream about your model not being ready, give it up to five minutes for everything to sync. 

For simplicity, we just use a microsoft example here, but you could theoretically use any csv file. This example is a recent earning report given by the CEO of Microsoft. 

In [None]:
# read the data file to be embedded
df = pd.read_csv('microsoft-earnings.csv')
print(df)

In [125]:
# set keys and configure Azure OpenAI
openai.api_type = "azure"
openai.api_base = "<YOUR BASE URL>"
openai.api_version = "2023-07-01-preview"
# get the key from the instructions in the README of this repo. 
#You can also just click View Code in the chat playground
openai.api_key = "<YOUR KEY>"


In [126]:
# calculate word embeddings 
df['embedding'] = df['text'].apply(lambda x:get_embedding(x, engine='text-embedding-ada-002'))
df.to_csv('microsoft-earnings_embeddings.csv')
print(df)

                                                 text  \
0   Thank you, Brett. To start, I want to outline ...   
1   With that context, this quarter, the Microsoft...   
2   It helps them align their spend with demand an...   
3   We are the platform of choice for customers' S...   
4   Now to data and AI. With our Microsoft Intelli...   
..                                                ...   
57  Other income and expense should be roughly $10...   
58  And finally, as a reminder, for Q2 cash flow, ...   
59  And FX should decrease COGS and operating expe...   
60  With the high margins in our Windows OEM busin...   
61  And while we continue to help our customers do...   

                                            embedding  
0   [-0.009504559449851513, -0.003731543431058526,...  
1   [-0.0016425022622570395, -0.028921114280819893...  
2   [0.008828130550682545, -0.03199512138962746, 0...  
3   [0.011994918808341026, -0.024179909378290176, ...  
4   [-0.004754434805363417, 0.00388

Query the embeddings. After each query you put into the little box, you need to rerun this cell to reset the query. 

In [127]:
# read in the embeddings .csv 
# convert elements in 'embedding' column back to numpy array
df = pd.read_csv('microsoft-earnings_embeddings.csv')
df['embedding'] = df['embedding'].apply(eval).apply(np.array)

# caluculate user query embedding 
search_term = input("Enter a search term: ")
if search_term:
    search_term_vector = get_embedding(search_term, engine='text-embedding-ada-002')

    # find similiarity between query and vectors 
    df['similarities'] = df['embedding'].apply(lambda x:cosine_similarity(x, search_term_vector))
    df1 = df.sort_values("similarities", ascending=False).head(5)

    # output the response 
    print('\n')
    print('Answer: ', df1['text'].loc[df1.index[0]])
    print('\n')
    print('Similarity Score: ', df1['similarities'].loc[df1.index[0]]) 
    print('\n')

### Query your own data

In the README, we show how to add your own data. When you have done this, type in a query, and then similar to what we show for above, if you click **View Code** in the Chat Playground, it will show you all the metadata you need to fill in here.

In [None]:
openai.api_type = "azure"
openai.api_version = "2023-08-01-preview"
# Azure OpenAI setup
openai.api_base = "<YOUR BASE URL>" # Add your endpoint here
deployment_id = "<YOUR DEPLOYMENT ID>" # Add your deployment ID here
# Azure Cognitive Search setup
search_endpoint = "<YOUR COG SEARCH BASE URL>"; # Add your Azure Cognitive Search endpoint here
# This is different than the key from above, its the key for the Cog search
search_key = "<YOUR SEARCH KEY>"; # Add your Azure Cognitive Search admin key here
search_index_name = "<YOUR SEARCH INDEX>"; # Add your Azure Cognitive Search index name here


Now run the query, note that the query is defined in the block below, and will output in Json format

In [69]:
def setup_byod(deployment_id: str) -> None:
    """Sets up the OpenAI Python SDK to use your own data for the chat endpoint.

    :param deployment_id: The deployment ID for the model to use with your own data.

    To remove this configuration, simply set openai.requestssession to None.
    """

    class BringYourOwnDataAdapter(requests.adapters.HTTPAdapter):

        def send(self, request, **kwargs):
            request.url = f"{openai.api_base}/openai/deployments/{deployment_id}/extensions/chat/completions?api-version={openai.api_version}"
            return super().send(request, **kwargs)

    session = requests.Session()

    # Mount a custom adapter which will use the extensions endpoint for any call using the given `deployment_id`
    session.mount(
        prefix=f"{openai.api_base}/openai/deployments/{deployment_id}",
        adapter=BringYourOwnDataAdapter()
    )

    openai.requestssession = session

setup_byod(deployment_id)

completion = openai.ChatCompletion.create(
    messages=[{"role": "user", "content": "What were some of the phenotypic presentations of MPOX on patients with HIV?"}],
    deployment_id=deployment_id,
    dataSources=[  # camelCase is intentional, as this is the format the API expects
        {
            "type": "AzureCognitiveSearch",
            "parameters": {
                "endpoint": search_endpoint,
                "key": search_key,
                "indexName": search_index_name,
            }
        }
    ]
)
print(completion)


{
  "id": "b50189ac-6646-4728-893b-f88eb6e2f60a",
  "model": "gpt-35-turbo",
  "created": 1696353881,
  "object": "chat.completion",
  "choices": [
    {
      "index": 0,
      "messages": [
        {
          "index": 0,
          "role": "tool",
          "content": "{\"citations\": [{\"content\": \"Of interest, there was \\na dramatic increase in the level of CD38+HLA-DR+ CD8+ T \\ncells after mpox infection (Figure 2C and 2D). In addition, lev-\\nels of plasma biomarkers\u2014including granzyme B, perforin, \\nRANTES (regulated on activation, normal T cell expressed \\nand secreted), CCL3, CXCL10, IL-2R\u03b1, PD-L1, but not \\nIL-6 increased markedly with mpox infection (Figure 2E).\\nFinally, we examined the impact of mpox on the size of HIV res-\\nervoirs carrying intact proviral DNA in highly enriched CD4+ T \\ncells of the study participant (Figure 2F). No significant changes \\nwere noted in the level of intact or defective proviral HIV DNA, \\nsuggesting that a mild case o

That's it! Go back to the README to review additional resources. 