In [0]:
%pip install azure-ai-inference azure-identity dotenv semantic-kernel flask azure-ai-documentintelligence pandas azure-storage-blob langchain langchain-community langchain-openai langchainhub openai azure-search-documents mflow azure-ai-inference azure-ai-ml databricks-sdk mlflow databricks-agents --quiet
dbutils.library.restartPython()

In [0]:
from azure.identity import DefaultAzureCredential, get_bearer_token_provider


# --- Azure Credentials & Authentication ---
azure_client_id = "7318b99c-c3ab-483e-979f-34c7e6bad8ea"
azure_tenant_id = "7f6a2cf9-5e4e-46ae-95d4-74016c1df1a6"
azure_client_secret = dbutils.secrets.get(scope="azure", key="rag")

credential = DefaultAzureCredential()
token_provider = get_bearer_token_provider(
    credential, "https://cognitiveservices.azure.com/.default"
)

# --- MLflow Configuration ---
MLFLOW_EXPERIMENT_PATH = "/Users/huy.d@hotmail.com/RAG_with_Azure_AI_Search_exp"
MLFLOW_ACTIVE_MODEL_NAME = "rag-dev"
MLFLOW_EXPERIMENT_TAGS = {
    "project": "RAG",
    "domain": "DA",
    "purpose": "Retrieval evaluation"
}

# --- Azure Service Endpoints & Versions ---
AZURE_OPENAI_ENDPOINT = "https://aifoundry6666.openai.azure.com/"
AZURE_SEARCH_ENDPOINT = "https://search6666.search.windows.net"
OPENAI_API_VERSION = "2024-02-01"
COGNITIVE_SERVICES_SCOPE = "https://cognitiveservices.azure.com/.default"
BLOB_SERVICE_SCOPE ="https://storage.azure.com/.default"

# --- Azure Storage Configuration ---
STORAGE_ACCOUNT_NAME = "tfstate6666"
CONTAINER_NAME = "pdfs"
STORAGE_ACCOUNT_URL=f"https://{STORAGE_ACCOUNT_NAME}.blob.core.windows.net"

# --- Model & Embedding Configuration ---
SEARCH_TYPE = "HNSW"
CHAT_MODEL_DEPLOYMENT = "gpt-4.1-mini"
EMBEDDING_MODEL_NAME = "text-embedding-3-large"
EMBEDDING_DIMENSIONS = 3072
GENERATION_TEMPERATURE = 0.1
EMBEDDING_ENDPOINT = "https://aifoundry6666.cognitiveservices.azure.com/openai/deployments/text-embedding-3-large/embeddings?api-version=2023-05-15"

# --- Data & RAG Configuration ---
SOURCE_DELTA_TABLE = "rag.development.md_chunks"
TOP_K = 5
SYSTEM_MESSAGE_PROMPT = "You are an intelligent assistant..."
AZURE_SEARCH_INDEX_NAME = "ragamuffin-index"
CHUNK_OVERLAP = 128

# --- Volume and Path Details ---
catalog = 'rag'
schema = 'development'
volume = 'blob'
folder = 'markdown'
volume_path = f"/Volumes/{catalog}/{schema}/{volume}/{folder}"

# --- Consolidated Configs Dictionary ---
configs = {
    "azure_client_id": azure_client_id,
    "azure_tenant_id": azure_tenant_id,
    "azure_client_secret": "*****",
    "MLFLOW_EXPERIMENT_PATH": MLFLOW_EXPERIMENT_PATH,
    "MLFLOW_ACTIVE_MODEL_NAME": MLFLOW_ACTIVE_MODEL_NAME,
    "MLFLOW_EXPERIMENT_TAGS": MLFLOW_EXPERIMENT_TAGS,
    "AZURE_OPENAI_ENDPOINT": AZURE_OPENAI_ENDPOINT,
    "AZURE_SEARCH_ENDPOINT": AZURE_SEARCH_ENDPOINT,
    "OPENAI_API_VERSION": OPENAI_API_VERSION,
    "COGNITIVE_SERVICES_SCOPE": COGNITIVE_SERVICES_SCOPE,
    "STORAGE_ACCOUNT_NAME": STORAGE_ACCOUNT_NAME,
    "CONTAINER_NAME": CONTAINER_NAME,
    "STORAGE_ACCOUNT_URL": STORAGE_ACCOUNT_URL,
    "SEARCH_TYPE": SEARCH_TYPE,
    "CHAT_MODEL_DEPLOYMENT": CHAT_MODEL_DEPLOYMENT,
    "EMBEDDING_MODEL_NAME": EMBEDDING_MODEL_NAME,
    "EMBEDDING_DIMENSIONS": EMBEDDING_DIMENSIONS,
    "GENERATION_TEMPERATURE": GENERATION_TEMPERATURE,
    "EMBEDDING_ENDPOINT": EMBEDDING_ENDPOINT,
    "SOURCE_DELTA_TABLE": SOURCE_DELTA_TABLE,
    "TOP_K": TOP_K,
    "SYSTEM_MESSAGE_PROMPT": SYSTEM_MESSAGE_PROMPT,
    "AZURE_SEARCH_INDEX_NAME": AZURE_SEARCH_INDEX_NAME,
    "CHUNK_OVERLAP": CHUNK_OVERLAP,
    "catalog": catalog,
    "schema": schema,
    "volume": volume,
    "folder": folder,
    "volume_path": volume_path
}

print(configs)
