In [None]:
from semantic_kernel.contents import FunctionCallContent, FunctionResultContent, StreamingTextContent
from semantic_kernel.agents import ChatCompletionAgent, ChatHistoryAgentThread
from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion
from semantic_kernel.functions import kernel_function
from IPython.display import display, HTML
from openai import AsyncOpenAI
import os

In [None]:
endpoint = "https://models.inference.ai.azure.com/"
#endpoint = "https://models.github.ai/inference"
model    = "gpt-4o-mini" #"openai/gpt-4.1"
token    = os.environ["OPENAI_API_KEY"]

### 🔧 Initialize OpenAI Client and Chat Completion Service

This cell sets up the connection to an OpenAI-compatible API endpoint and prepares the chat completion service used by the agent.

- `AsyncOpenAI`: Asynchronous client configured with a custom `base_url` (e.g., Azure or third-party provider) and an API key for authentication.
- `OpenAIChatCompletion`: A Semantic Kernel wrapper that enables chat-based completions using the specified model and client.

**Parameters:**
- `endpoint`: The base URL for the OpenAI-compatible API.
- `token`: API key for authentication.
- `model`: The identifier for the model to use (e.g., `gpt-4`, `gpt-4o`, or provider-specific).

This setup is required for enabling the `ChatCompletionAgent` to send and receive messages using the specified model.


In [None]:
client = AsyncOpenAI(
    base_url=endpoint,
    api_key=token
)

# Create an AI Service that will be used by the `ChatCompletionAgent`
chat_completion_service = OpenAIChatCompletion(
    ai_model_id=model,
    async_client=client
)

### 🧩 WebsiteChatbotPlugin

This class enables semantic search over a predefined list of web pages. It:

- Loads or builds an index of website content by scraping a list of important links.
- Chunks the text into smaller strings of 300.
- Uses OpenAI embeddings to represent text chunks.
- Provides a kernal function `search_website` method to retrieve relevant information based on user queries, 
  by computing the cosine similirties and fetching the most relevant answers.
- Finally, The model answers provided with information from the top answers.

In [None]:
from website_scraper import WebScraper
import numpy as np
import pandas as pd

class WebsiteChatbotPlugin:
    """A simple plugin to retrieve relevant content from a website."""

    def __init__(self, client):
        self.client = client
        self.base_urls = [
            "https://backup.mittspeciellabarn.se/",
            "https://backup.mittspeciellabarn.se/community-support/",
            "https://backup.mittspeciellabarn.se/activities/",
            "https://backup.mittspeciellabarn.se/services/",
            "https://backup.mittspeciellabarn.se/about-us/",
            "https://backup.mittspeciellabarn.se/activities-2/"
        ]
        self.df = None
            
    async def read_index(self, filename="website_index.csv"):
        try:
            self.df = pd.read_csv(filename)
            self.df['embeddings'] = self.df.embeddings.apply(eval).apply(np.array)
            print(f"✅ Loaded index from {filename}.")
        except FileNotFoundError:
            print(f"❌ File {filename} not found. Building index ...")
            chunks, links = self.build_index()
            embeddings = await self.get_embeddings(chunks)
            self.df = pd.DataFrame({'url': links, 'text': chunks, 'embeddings': embeddings})
            self.df.to_csv(filename, index=False)
            print(f"✅ Index built and saved to {filename}.")
    
        
    @kernel_function(description="Provides relevant information and resources from the website.")
    async def search_website(self, user_query, top_k=3):
        # Extract actual query text from the dict
        if isinstance(user_query, dict) and "query" in user_query:
            user_query = user_query["query"]
        user_query_embedding = await self.get_embeddings([user_query])
        similarities = self.df.embeddings.apply(lambda e: self.cosine_similarity(e, user_query_embedding[0]))
        top_indices = similarities.nlargest(top_k).index
        top_texts = self.df.loc[top_indices, 'text'].tolist()
        return top_texts
    

    ################################################
    ############### helper functions ###############
    ################################################
    
    
    def build_index(self, max_level=4):
        """Build an index of the website content."""
        
        unique_links_dict = WebScraper().scrape(self.base_urls, max_level=max_level)
        chunks = []
        links = []
        for url, text in unique_links_dict.items():
            if text.strip():
                chunked = self.chunk_text(text)
                chunks.extend(chunked)
                links.extend([url] * len(chunked))
        print(f"✅ Created {len(chunks)} text chunks ready for embedding. Compared to the original {len(unique_links_dict.keys())} text chunks.")
        return chunks, links
    
    
    def chunk_text(self, text, max_tokens=300):
        """Chunk the text into smaller pieces of tokens."""
        words = text.split()
        chunks = []
        for i in range(0, len(words), max_tokens):
            chunk = " ".join(words[i:i+max_tokens])
            if chunk.strip():
                chunks.append(chunk)
        return chunks
     
    async def get_embeddings(self, texts, model="text-embedding-3-small"):
        embeddings = await self.client.embeddings.create(
            input=texts,
            model=model
        )
        embeddings = [e.embedding for e in embeddings.data]
        return embeddings
    
    
    async def get_embedding(self, text, model="text-embedding-3-small"):
        embeddings = await self.client.embeddings.create(input = [text], model=model)
        return embeddings.data[0].embedding
    
    
    def cosine_similarity(self, a, b):
        a = np.array(a)
        b = np.array(b)
        return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
    

### 🤖 ChatCompletionAgent Setup

This section initializes the AI agent that will assist users in querying the *Mitt Speciella Barn* website.

- **AGENT_INSTRUCTIONS**: Provides role guidance and interaction style for the assistant.
- **WebsiteChatbotPlugin**: Loads the website index to enable semantic search.
- **ChatCompletionAgent**: Wraps the chat completion service with the plugin and instructions to act as a helpful assistant focused on website content related to children with disabilities.


In [105]:
AGENT_INSTRUCTIONS = """You are a helpful AI Agent that can assist users in finding relevant information and resources regarding children with special needs from the "MittSpeciellaBarn" website.

Important: When users specify a topic or query, always focus on providing information related to that topic from the website. Only suggest general resources when an answer is not relevant from the website.

When the conversation begins, introduce yourself with this message:
"Hello! I'm your Mitt Speciella Barn assistant. I can help you find information resources and activites related to support for children and individuals with disabilities. Here are some things you can ask me:
1. How to apply for financial support for children with disabilities.
2. Information about specific services or activities.
3. Details about community support and resources.
4. Guidance on navigating the website for specific information.

What can I help you with today?"

Always prioritize user preferences. Answer in the same langague as the question. 
If they mention a specific topic like "financial support" or "community activities," focus your assistance on that topic rather than suggesting unrelated resources.
"""

wcp = WebsiteChatbotPlugin(client=client)
await wcp.read_index(filename="website_index.csv")

agent = ChatCompletionAgent(
    service=chat_completion_service, 
    plugins=[wcp],
    name="WebsiteAgent",
    instructions=AGENT_INSTRUCTIONS,
)

✅ Loaded index from website_index.csv.


In [None]:
# A list of example queries covering different user intents and languages.

user_inputs = [
    "Hur kan jag ansöka om ekonomiskt stöd för mitt barn med funktionsnedsättning?",
    "اين الخدمات المتاحة لدعم الأطفال ذوي الاحتياجات الخاصة؟"
    "plan an activity for my child with special needs",
    ""
]

### 🧪 Testing the Website Agent with Multilingual Inputs

This section runs a simulated interaction with the `ChatCompletionAgent` using various user queries in Swedish, Arabic, and English.

- **main()**:
  - Iterates over each input and streams the assistant's response.
  - Tracks and reconstructs streamed function calls and their results.
  - Displays the user query, assistant response, and any function call details in a styled HTML format for easy readability.
- **Thread Management**: Maintains conversation continuity across multiple queries by reusing the `thread` object.


In [None]:
async def main():
    thread: ChatHistoryAgentThread | None = None

    for user_input in user_inputs:
        html_output = (
            f"<div style='margin-bottom:10px'>"
            f"<div style='font-weight:bold'>User:</div>"
            f"<div style='margin-left:20px'>{user_input}</div></div>"
        )

        agent_name = None
        full_response: list[str] = []
        function_calls: list[str] = []

        # Buffer to reconstruct streaming function call
        current_function_name = None
        argument_buffer = ""

        async for response in agent.invoke_stream(
            messages=user_input,
            thread=thread,
        ):
            thread = response.thread
            agent_name = response.name
            content_items = list(response.items)

            for item in content_items:
                if isinstance(item, FunctionCallContent):
                    if item.function_name:
                        current_function_name = item.function_name

                    # Accumulate arguments (streamed in chunks)
                    if isinstance(item.arguments, str):
                        argument_buffer += item.arguments
                elif isinstance(item, FunctionResultContent):
                    # Finalize any pending function call before showing result
                    if current_function_name:
                        formatted_args = argument_buffer.strip()
                        try:
                            parsed_args = json.loads(formatted_args)
                            formatted_args = json.dumps(parsed_args)
                        except Exception:
                            pass  # leave as raw string

                        function_calls.append(f"Calling function: {current_function_name}({formatted_args})")
                        current_function_name = None
                        argument_buffer = ""

                    function_calls.append(f"\nFunction Result:\n\n{item.result}")
                elif isinstance(item, StreamingTextContent) and item.text:
                    full_response.append(item.text)

        if function_calls:
            html_output += (
                "<div style='margin-bottom:10px'>"
                "<details>"
                "<summary style='cursor:pointer; font-weight:bold; color:#0066cc;'>Function Calls (click to expand)</summary>"
                "<div style='margin:10px; padding:10px; background-color:#f8f8f8; "
                "border:1px solid #ddd; border-radius:4px; white-space:pre-wrap; font-size:14px; color:#333;'>"
                f"{chr(10).join(function_calls)}"
                "</div></details></div>"
            )

        html_output += (
            "<div style='margin-bottom:20px'>"
            f"<div style='font-weight:bold'>{agent_name or 'Assistant'}:</div>"
            f"<div style='margin-left:20px; white-space:pre-wrap'>{''.join(full_response)}</div></div><hr>"
        )

        display(HTML(html_output))

await main()