## Configure your environment

Download the following file on your computer :

https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken

This model file is required by langchain for the tokenisation (slashing words in subparts before embedding).

rename it `9b5ad71b2ce5302211f9c61530b329a4922fc6a4` (no extension).

Create a new directory in your space called `tiktoken_cache`.


Upload the file in this directory :


```
📁 tiktoken_cache
 └── 📄 9b5ad71b2ce5302211f9c61530b329a4922fc6a4
```

You then must set the `TIKTOKEN_CACHE_DIR` env variable as `tiktoken_cache` (see [Configure your Azure Environment](#Configure-your-Azure-Environment)).

Make sure to change the following variables accordingly to your environment : 
- `AZURE_OPENAI_ENDPOINT` should be changed to `https://aoai-<your-oai-prefix>.openai.azure.com`
- `OPENAI_API_VERSION` should be updated to the latest version
- `GPT_ENGINE` should be changed to your desired GPT model

In [19]:
!pip install langchain_mistralai pdfplumber
from langchain.chains import LLMChain
from langchain_core.prompts import ChatPromptTemplate,HumanMessagePromptTemplate
from langchain.chains import LLMChain
from langchain.chains import SimpleSequentialChain
from langchain.document_loaders import PyPDFLoader, PythonLoader
from langchain.chains.summarize import load_summarize_chain
from langchain.prompts import FewShotChatMessagePromptTemplate,ChatPromptTemplate
from langchain.prompts import FewShotPromptTemplate
from langchain.agents import initialize_agent, AgentType, load_tools
from httpx_auth import OAuth2ClientCredentials
import numpy as np
from numpy.linalg import norm
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.callbacks import StdOutCallbackHandler
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.storage import InMemoryStore
from langchain.embeddings import OpenAIEmbeddings
from langchain.retrievers.multi_vector import MultiVectorRetriever 
import uuid
from langchain.chains import RetrievalQA
from langchain.output_parsers import NumberedListOutputParser
from langchain.chains import LLMChain
from langchain.schema.document import Document
import httpx
from httpx_auth import HeaderApiKey
import requests
import openai
from langchain.llms import OpenAI
import sys
from dotenv import load_dotenv, find_dotenv
from langchain_text_splitters import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.text_splitter import CharacterTextSplitter
from langchain_chroma import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain_core.prompts import PromptTemplate
from langchain_mistralai import ChatMistralAI, MistralAIEmbeddings
from langchain_core.globals import set_llm_cache
from langchain_core.caches import InMemoryCache


Looking in indexes: https://d63486:****@artifactory.cib.echonet/artifactory/api/pypi/pypi-remote/simple


In [2]:
import os
import httpx
import json
from dotenv import load_dotenv, find_dotenv
from httpx_auth import OAuth2ClientCredentials
from openai import AzureOpenAI as AzureOpenAINative
from langchain_openai import AzureOpenAI
from langchain_openai import AzureChatOpenAI
from langchain_openai import AzureOpenAIEmbeddings
import hashlib
import tiktoken
import unicodedata

In [21]:



AZURE_AOAI_API_VERSION = "2024-08-01-preview"
AZURE_AOAI_MODEL_GPT3_TURBO = "gpt35turbo"
AZURE_AOAI_MODEL_GPT4O = "gpt4o"
AZURE_AOAI_MODEL_GPT4OMINI = "gpt4omini"
AZURE_EMBEDDING_MODEL = "text-embedding-ada"

### Update Tiktoken & add it into envionment variable 

In [4]:
def update_tiktoken():

    os.environ["TIKTOKEN_CACHE_DIR"] = "/mnt/tiktoken_cache"
    blobpath = os.environ['TOKEN_BLOB_PATH']
    cache_key = hashlib.sha1(blobpath.encode()).hexdigest()
    # validate
    assert os.path.exists(os.path.join(os.environ["TIKTOKEN_CACHE_DIR"], cache_key))

### Get Authentification Credentials

In [5]:
def get_auth():

    update_tiktoken()

    OIDC_ENDPOINT = os.environ["OIDC_ENDPOINT"]
    OIDC_CLIENT_ID = os.environ["OIDC_CLIENT_ID"]
    OIDC_CLIENT_SECRET = os.environ["OIDC_CLIENT_SECRET"]
    OIDC_SCOPE = os.environ["OIDC_SCOPE"]
    oauth2_httpxclient=httpx.Client(verify=False)
    auth=OAuth2ClientCredentials(OIDC_ENDPOINT, client_id=OIDC_CLIENT_ID, client_secret=OIDC_CLIENT_SECRET, scope=OIDC_SCOPE,client=oauth2_httpxclient)

    return auth

### Create native azure openai llm client instance

In [6]:
def create_openai_native(api_version):

    APIGEE_ENDPOINT = os.environ["APIGEE_ENDPOINT"]
    AZURE_AOAI_API_VERSION=api_version
    auth=get_auth()

    client = AzureOpenAINative(
        api_version=AZURE_AOAI_API_VERSION,
        azure_endpoint=APIGEE_ENDPOINT,
        api_key="FAKE_KEY",
        http_client=httpx.Client(auth=auth, verify=False),
    )
    return client

### Create azure openai llm chat model with langchain

In [7]:
def create_llm_chat_langchain(model_name,api_version,temperature=0):
    APIGEE_ENDPOINT = os.environ["APIGEE_ENDPOINT"]
    auth=get_auth()

    client = AzureChatOpenAI(
        api_version=api_version,
        azure_endpoint=APIGEE_ENDPOINT,
        api_key="FAKE_KEY",
        http_client=httpx.Client(auth=auth, verify=False),
        model=model_name,
        temperature = temperature
    )

    return client

### Create azure openai llm model with langchain

In [8]:
def create_llm_langchain(model_name,api_version,temperature=0):
    APIGEE_ENDPOINT = os.environ["APIGEE_ENDPOINT"]
    auth=get_auth()

    client = AzureOpenAI(
        api_version=api_version,
        azure_endpoint=APIGEE_ENDPOINT,
        api_key="FAKE_KEY",
        http_client=httpx.Client(auth=auth, verify=False),
        deployment_name=model_name,
        temperature = temperature
    )

    return client

### Create azure openai embeddings with langchain

In [9]:
def create_embeddings_azureopenai(embedding_model, api_version):


    APIGEE_ENDPOINT = os.environ["APIGEE_ENDPOINT"]
    auth=get_auth()

    embeddings = AzureOpenAIEmbeddings(
        api_version=api_version,
        azure_endpoint=APIGEE_ENDPOINT,
        api_key="FAKE_KEY",
        http_client=httpx.Client(auth=auth, verify=False),
        model=embedding_model

    )

    return embeddings

# Test

In [10]:
load_dotenv(find_dotenv())

True

In [11]:
text = """
    Controle Technique des Constructions   Intervention obligatoire v  062021 19 Classification   Confidential 15 9 Transferts internationaux de Donnees a caractere personnel Aucune Donnee a caractere personnel du Maitre d Ouvrage traitee au sein de l EEE par le Controleur technique ou par ses Sous  traitants de second  rang ne peutde second  rang ne peut etre transferee en dehors de l EEE sans l accord ecrit et prealable du Maitre d Ouvrage  Lorsqu un tel ac cord est donne par le Maitre d Ouvrage  il doit etre subordonne a tout transfert effectue  i  aux termes d un accord contraignant  ex  via un avenant au present Contrat  et  ii  a la mise en place deet  ii  a la mise en place de garanties appropriees  ex  les clauses types de l Union europeenne relative au transfert de Donnees a caractere personnel du Responsable de traitement vers un Sous  traitant   Le Controleur technique fournit au Maitre d Ouvrage sans delai et a la demande de celui  ci toute preuve et ou copie destoute preuve et ou copie des points  i  et  ii   ci dessus  15 10 Traitement par le Maitre d Ouvrage des Donnees a caractere personnel du Personnel d u Controleur technique Dans certains cas  le Maitre d Ouvrage peut etre amene a traiter des Donnees a caractere personnel du Personnel du Controleur technique  ex  prenom  nomtechnique  ex  prenom  nom  numero de telephone portable  adresse email   et ce notamment a des fins de securite et de continuite des activites  Le Controleur technique porte a l attention de son Personnel la notice d information de BNP Paribas disponible sur l adresse du site institutionnel de BNP Paribas  group bnpparibas comParibas  group bnpparibas com   jesuis  fournisseur    Le Controleur technique consent au traitement des Donnees a cara ctere personnel du Personnel du Controleur technique par le M
 """

In [12]:
llm = create_llm_chat_langchain(AZURE_AOAI_MODEL_DEPLOYMENT_NAME,AZURE_AOAI_API_VERSION) # 
llm2 = create_llm_chat_langchain(AZURE_AOAI_MODEL_DEPLOYMENT_NAME,AZURE_AOAI_API_VERSION) # 

In [13]:
print(llm)

client=<openai.resources.chat.completions.Completions object at 0x7f206530dd90> async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x7f206530f2c0> root_client=<openai.lib.azure.AzureOpenAI object at 0x7f208059dbb0> root_async_client=<openai.lib.azure.AsyncAzureOpenAI object at 0x7f206530ddc0> model_name='gpt4o' temperature=0.0 model_kwargs={} openai_api_key=SecretStr('**********') http_client=<httpx.Client object at 0x7f20652a7590> disabled_params={'parallel_tool_calls': None} azure_endpoint='https://aifactory.api.staging.echonet/genai-model/v1' openai_api_version='2024-08-01-preview' openai_api_type='azure'


In [25]:
APIGEE_ENDPOINT="https://aifactory.api.staging.echonet/genai-model/v1"

api_key = "sk-GMIuv1MaL-IPS-PP-dev"  # Replace with your real API token
headers = {
    "X-Api-Key": "sk-GMIuv1MaL-IPS-PP-dev"  # Replace with your real API token
}
endpoint = "https://dmn-ap26180-prod-1b000272.datalab.cloud.echonet/u/903609/llmaas/app"
 
auth = HeaderApiKey(api_key=api_key)
http_client = httpx.Client(auth=auth, verify=False)
 
response = requests.head(endpoint, allow_redirects=True, verify=False)
url = response.url
client = openai.OpenAI(base_url=url, http_client=http_client, api_key="fake_key")  # The api_key=fake_key is required otherwise OpenAI raises an error

client2 = AzureChatOpenAI(
        api_version=AZURE_AOAI_API_VERSION,
        azure_endpoint=APIGEE_ENDPOINT,
        api_key="FAKE_KEY",
        http_client=httpx.Client(auth=auth, verify=False),
        model=AZURE_AOAI_MODEL_GPT4O,
        temperature = 0
    )




In [15]:

load_dotenv(find_dotenv())

AZURE_AOAI_API_VERSION = "2024-08-01-preview"
AZURE_AOAI_MODEL_GPT3_TURBO = "gpt35turbo"
AZURE_AOAI_MODEL_GPT4O = "gpt4o"
AZURE_AOAI_MODEL_GPT4OMINI = "gpt4omini"
AZURE_EMBEDDING_MODEL = "text-embedding-ada"

In [28]:
client.chat.completions.create(
            model="mistral-large-2407",
            messages=[
                {"role": "system", "content": "You are a helpful assistant to analyse images."},
                {"role": "user","content": "Who is the CEO of BNP Paribas?"},
            ],
            max_tokens=2000,
            temperature=0.0,
        )

ChatCompletion(id='930c4716bb1d4c64bb41b919fdb95db3', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='As of my last update in October 2023, the CEO of BNP Paribas is Jean-Laurent Bonnafé. He has been serving in this role since December 2011. However, for the most current information, I recommend checking the latest updates from BNP Paribas or reliable financial news sources.', refusal=None, role='assistant', function_call=None, tool_calls=None))], created=1743090923, model='mistral/mistral-large-2502', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=71, prompt_tokens=26, total_tokens=97, completion_tokens_details=None, prompt_tokens_details=None))

In [30]:
llm.invoke("Who is the CEO of BNP Paribas?")

AIMessage(content='As of my last update in October 2023, the CEO of BNP Paribas is Jean-Laurent Bonnafé. Please verify with the latest sources as executive positions can change.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 39, 'prompt_tokens': 16, 'total_tokens': 55, 'completion_tokens_details': {'audio_tokens': 0, 'reasoning_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_ded0d14823', 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results

In [14]:

embeddings = create_embeddings_azureopenai(AZURE_EMBEDDING_MODEL,AZURE_AOAI_API_VERSION)
print(embeddings)

TypeError: AsyncClient.__init__() got an unexpected keyword argument 'proxies'

In [15]:
print(embeddings.embed_query(text))

NameError: name 'embeddings' is not defined