In [None]:
import os
from dotenv import load_dotenv

from semantic_kernel.kernel import Kernel

from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion # Azure OpenAI — it abstracts calling your deployment

from semantic_kernel.contents.chat_history import ChatHistory # manages a chat thread — each user/assistant message gets stored here so the model can keep context

from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.azure_chat_prompt_execution_settings import (
    AzureChatPromptExecutionSettings,       # execution settings that tell the model how to respond
)

In [26]:
# Load your Azure OpenAI key from .env
load_dotenv()
api_key = os.getenv("AZURE_OPENAI_KEY")

In [None]:
# Create the kernel
kernel = Kernel()

# Add your Azure OpenAI service
chat_completion = AzureChatCompletion(
        deployment_name="gpt-4o-mini",  # Replace with your deployment name
        endpoint="https://exquitech-openai-2.openai.azure.com/",
        api_key=os.getenv("AZURE_OPENAI_KEY"),
    )

kernel.add_service(chat_completion, "chat_completion")          # Adds the model to the kernel under the label "chat_completion" so it can be referred to later.


In [30]:
# Create the prompt execution settings
settings = AzureChatPromptExecutionSettings()

This configures how the model responds. You can customize things here like:

- max_tokens

- temperature

- top_p

- frequency_penalty, etc.

Default settings are used if you don't set anything — that's okay for basic cases.

In [33]:
# Create a chat history (context for the model)
chat = ChatHistory()
chat.add_user_message("What is the capital of France?")

In [None]:
# Get the response from the AI
result = await chat_completion.get_chat_message_content(
    chat_history=chat,
    settings=settings,
    kernel=kernel,
)

Assistant > The capital of France is Paris.


This line:
- Uses your chat_completion service.
- Sends the current chat history.
- Uses the execution settings.
- References the kernel so services and plugins can be pulled in if needed.

In [35]:
# Print the results
print("Assistant > " + str(result))

Assistant > The capital of France is Paris.


In [34]:
# Add the message from the agent to the chat history
chat.add_message(result)

- Adds the assistant’s message to the chat history so future calls can maintain context.
- If you ask another question like “What is its population?”, the model will know you meant Paris because the chat history includes it.

In [36]:
chat.add_user_message("What is the population?")

result = await chat_completion.get_chat_message_content(
    chat_history=chat,
    settings=settings,
    kernel=kernel,
)

print("Assistant > " + str(result))

chat.add_message(result)

Assistant > As of my last update in October 2021, the population of Paris was approximately 2.1 million people within the city proper. However, the larger metropolitan area, known as the Île-de-France region, has a population of around 12 million. For the most current population figures, I recommend checking the latest statistics from a reliable source, such as the French government's official statistics office or recent census data.


As you can see, adding the message (result) to the chat (ChatHistory()) after first question, allows multi-turn memory 