In [None]:
# Standard Library
import asyncio
import os

# Third-Party Libraries
import faiss
import numpy as np
from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer
from tqdm import tqdm
import torch

# Langchain
from langchain.text_splitter import MarkdownTextSplitter
from langchain_community.document_loaders import PyPDFLoader

# Semantic Kernel
from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
from semantic_kernel.connectors.ai.open_ai import AzureChatPromptExecutionSettings
from semantic_kernel.functions.kernel_arguments import KernelArguments

# Event Loop Patch for Jupyter
import nest_asyncio

# Load environment variables
load_dotenv()

search_api = os.getenv("AI_SEARCH_API")
search_endpoint = "https://cardassist.search.windows.net"
ai_foundry_api = os.getenv("AI_FOUNDRY_MODEL_API")
llm_endpoint = os.getenv("LLM_ENDPOINT")

# Patch the event loop for Jupyter compatibility
nest_asyncio.apply()

pdf_doc_path = "./global_card_access_user_guide.pdf"
loader = PyPDFLoader(pdf_doc_path)
documents = loader.load()
text_splitter = MarkdownTextSplitter(chunk_size=300, chunk_overlap=30)
md_docs = text_splitter.split_documents(documents)
md_docs = [doc.page_content for doc in md_docs]


# Initialize embedding model
device = "mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu"

embedding_service = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", trust_remote_code=True, device=device)

# Generate and index embeddings
embedding_matrix = np.array(
    [embedding_service.encode([doc])[0] for doc in tqdm(md_docs, desc="Generating embeddings")]
).astype("float32")

index = faiss.IndexFlatL2(embedding_matrix.shape[1])
index.add(embedding_matrix)

Generating embeddings: 100%|██████████| 161/161 [00:01<00:00, 130.72it/s]


## Creating plugins for Activate, De-activate credit card and RAG QA 

In [None]:
from typing import Annotated
from semantic_kernel.functions import kernel_function

class CreditCardPlugin:
    @kernel_function(
        description="Deactivate a credit card; returns a confirmation message"
    )
    async def deactivate_card(self, card_number: Annotated[str, "The credit card number to deactivate"]) -> str:
        print("Function called to deactivate card")
        return f"Credit card {card_number} has been deactivated."

    @kernel_function(
        description="Activate a credit card; returns a confirmation message"
    )
    async def activate_card(self, card_number: Annotated[str, "The credit card number to activate"]) -> str:
        print("Function called to activate card")
        return f"Credit card {card_number} has been activated."
    
    @kernel_function(
        description="Get card information and other general information about the card and account management"
    )
    async def rag_query(self, query: Annotated[str, "The user query for RAG (Retrieval-Augmented Generation)"]):
        print("Function called for RAG query")
        query_embedding = embedding_service.encode([query])
        query_embedding = np.array(query_embedding).astype("float32")

        k = 10
        distances, indices = index.search(query_embedding, k)
        relevant_chunks = [md_docs[i] for i in indices[0]]

        context = "\n".join(relevant_chunks)
        augmented_prompt = f"{context}\n\nUser Query: {query}"
        
        return augmented_prompt

## Initializing Agent


In [None]:
from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion
from semantic_kernel.core_plugins.time_plugin import TimePlugin

# Initialize the kernel
kernel = Kernel()

model_id = "gpt-4o-mini"

# Add the Azure OpenAI chat completion service
kernel.add_service(
    AzureChatCompletion(deployment_name=model_id,
                        endpoint=llm_endpoint,
                        api_key=ai_foundry_api)
)

# Add a plugin
kernel.add_plugin(
    CreditCardPlugin(),
    plugin_name="CreditCard",
)

arguments = KernelArguments(
    settings=AzureChatPromptExecutionSettings(
        # Advertise all functions from the WeatherPlugin, DateTimePlugin, and LocationPlugin plugins to the AI model.
        function_choice_behavior=FunctionChoiceBehavior.Auto(),
        # function_choice_behavior=FunctionChoiceBehavior.Required(filters={"included_functions": ["deactivate_card", "activate_card"]}),
        top_p= 0.9,
        temperature=0,
    )
)

async def run_model(user_query: str , system_prompt: str ):
    resp = await kernel.invoke_prompt(prompt = f"{system_prompt}\n\nUser Query: {user_query}", arguments=arguments )
    return resp.value

In [None]:
system_prompt = "You're a helpful assistant that can answer questions about credit card management, including activating and deactivating cards, and providing information about card features and account management. Make sure you provide accurate and well structured information based on the provided context."

print(system_prompt)

You're a helpful assistant that can answer questions about credit card management, including activating and deactivating cards, and providing information about card features and account management. Make sure you provide accurate and well structured information based on the provided context.


## Testing card activation

In [None]:
user_query = "Can you activate my credit card 1234-5678-9012-3456?"

# Run the asynchronous function
response = asyncio.run(run_model(system_prompt=system_prompt, user_query=user_query))
print("\nResponse:\n", response[0].content)

Function called to activate card

Response:
 Your credit card **1234-5678-9012-3456** has been successfully activated. If you have any other questions or need further assistance, feel free to ask!


## Testing card deactivation

In [None]:
user_query = "Can you deactivate my credit card 1234-5678-9012-3456?"

# Run the asynchronous function
response = asyncio.run(run_model(system_prompt=system_prompt, user_query=user_query))
print("\nResponse:\n", response[0].content)

Function called to deactivate card

Response:
 Your credit card ending in 3456 has been successfully deactivated. If you need any further assistance, feel free to ask!


## Testing QA RAG

In [None]:
user_query = "Steps for First-time Registration for Corporate Accounts"

# Run the asynchronous function
response = asyncio.run(run_model(system_prompt=system_prompt, user_query=user_query))
print("\nResponse:\n", response[0].content)

Function called for RAG query

Response:
 To register as a new user for a corporate account, follow these steps:

1. **Access the Global Card Access Website**: Go to [Global Card Access](https://www.bankofamerica.com/globalcardaccess).

2. **Click on Register Now**: On the Global Card Access sign-in screen, click on the "Register now" option. 
   - If your organization has multiple corporate accounts, a "Select Corporate Account(s)" window will appear. Choose the appropriate corporate account and click OK.

3. **Complete the Create Account Request Key(s) Page**: 
   - Configure your Account Request Key in the Settings section. This key will be used by your employees to request accounts online.

4. **Add a Corporate Account**: 
   - If your company is set up for this feature, select "Add" from the bottom left-hand side of the Corporate Accounts screen to review and approve new account requests.

5. **Creating Account Request Keys**: 
   - Each Account Request Key has specific settings t

## Hybrid query

In [None]:
user_query = "Can you activate my card 1234-5678-9012-3456? Also, let me know it's benefits and features."

# Run the asynchronous function
response = asyncio.run(run_model(system_prompt=system_prompt, user_query=user_query))
print("\nResponse:\n", response[0].content)

Function called to activate card
Function called for RAG query

Response:
 Your credit card **1234-5678-9012-3456** has been successfully activated.

### Benefits and Features of the Card:
1. **Global Card Access**: An online management tool that allows you to check your credit limit, balance, and available credit.
2. **Security Features**: You can view and change your PIN, lock your card, and manage alerts for added security.
3. **Convenient Payments**: Payments can be made online, providing ease of access and management.
4. **Access to Statements**: You can view and download your statements for better financial tracking.
5. **Customizable Alerts**: Set up alerts for due dates, spending limits, and other important notifications.

If you have any more questions or need further assistance, feel free to ask!


# Experiments


## RAG QA

In [None]:
from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion, OpenAITextEmbedding
from azure.core.credentials import AzureKeyCredential

embed_endpoint = os.getenv("EMBEDDING_ENDPOINT")

model_name = "text-embedding-3-large"
deployment = "text-embedding-3-large"


kernel = Kernel()

embed_client = OpenAITextEmbedding(   
    # api_version="2024-12-01-preview",
    ai_model_id="gpt-4o-mini",
    api_key=ai_foundry_api,
    # azure_endpoint=endpoint,
)

kernel.add_service(embed_client)

# response = client.embeddings.create(
#     input=["first phrase","second phrase","third phrase"],
#     model=deployment,
# )

In [None]:
import asyncio
from dataclasses import dataclass, field
from typing import Annotated, List
from uuid import uuid4

from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.open_ai import OpenAITextEmbedding
from semantic_kernel.connectors.memory.in_memory import InMemoryVectorStore
from semantic_kernel.data import (
    DistanceFunction,
    IndexKind,
    VectorStoreRecordDataField,
    VectorStoreRecordKeyField,
    VectorStoreRecordVectorField,
    vectorstoremodel,
)

# Define the data model
@vectorstoremodel
@dataclass
class DocumentChunk:
    content: Annotated[str, VectorStoreRecordDataField(is_full_text_searchable=True)]
    embedding: Annotated[List[float], VectorStoreRecordVectorField(
        dimensions=1536,  # Adjust based on your embedding model's output
        distance_function=DistanceFunction.COSINE_SIMILARITY,
        index_kind=IndexKind.FLAT
    )]
    id: Annotated[str, VectorStoreRecordKeyField()] = field(default_factory=lambda: str(uuid4()))

async def main():
    # Initialize the kernel
    kernel = Kernel()

    # Initialize the embedding model
    # embedding_model = OpenAITextEmbedding(service_id="embedder")  # Ensure this matches your configuration
    kernel.add_service(embed_client)

    # Initialize the in-memory vector store
    vector_store = InMemoryVectorStore()
    kernel.add_service(vector_store)

    # Get or create the collection
    collection = vector_store.get_collection("pdf_chunks", data_model_type=DocumentChunk)
    await collection.create_collection_if_not_exists()


    # Process and upsert each document chunk
    for chunk in md_docs:
        embedding = await embedding_model.generate_embeddings([chunk])
        record = DocumentChunk(content=chunk, embedding=embedding[0])
        await collection.upsert(record)

# Run the asynchronous main function
asyncio.run(main())

In [None]:
async def search_similar_documents(query: str):
    # Generate embedding for the query
    response = openai.Embedding.create(
        input=[query],
        engine="text-embedding-ada-002"  # Replace with your deployed model name
    )
    query_embedding = response['data'][0]['embedding']

    # Perform vector search
    search_results = await collection.vectorized_search(vector=query_embedding, top=5)

    # Display the results
    for result in search_results.results:
        print(f"Score: {result.score:.4f}, Content: {result.record.content}")

# Example usage
asyncio.run(search_similar_documents("What are the steps to deactivate a credit card?"))

In [None]:
from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion
from semantic_kernel.core_plugins.time_plugin import TimePlugin
from semantic_kernel.prompt_template import KernelPromptTemplate
from semantic_kernel.prompt_template.prompt_template_config import PromptTemplateConfig

# Initialize the kernel
kernel = Kernel()

# Configure Azure OpenAI service
model_id = "gpt-4o-mini"

kernel.add_service(AzureChatCompletion(deployment_name=model_id, endpoint=llm_endpoint, api_key=ai_foundry_api))

# Add plugins if necessary
kernel.add_plugin(TimePlugin(), plugin_name="Time")

async def answer_query(query: str):
    # Generate embedding for the query
    query_embedding = embedding_model.encode([query], convert_to_numpy=True).tolist()[0]
    # Perform vector search
    search_results = await collection.vectorized_search(
    vector=query_embedding,
    vector_name="text-dense",
    top=5
    )
    # Compile context from search results
    context = "\n".join([result.record.content for result in search_results.results])
    # Define the prompt
    prompt = f"""
    You are an assistant that provides answers based on the following context:

    {context}

    Question: {query}
    Answer:"""
    # Invoke the model
    response = await kernel.invoke_prompt(prompt)
    return response.value


response = await answer_query("What are the steps to deactivate a credit card?")

In [None]:
from dataclasses import dataclass
from typing import List
from semantic_kernel.data import (
    DistanceFunction,
    IndexKind,
    VectorStoreRecordVectorField,
    vectorstoremodel,
)

@vectorstoremodel
@dataclass
class DocumentChunk:
    content: Annotated[str, VectorStoreRecordDataField(is_full_text_searchable=True)]
    embedding: Annotated[List[float], VectorStoreRecordVectorField(dimensions=512, distance_function=DistanceFunction.COSINE_SIMILARITY, index_kind=IndexKind.HNSW)]
    id: Annotated[str, VectorStoreRecordKeyField()] = field(default_factory=lambda: str(uuid4()))
    




## Different approach

https://medium.com/@j.wang.mlds_97641/building-a-rag-pipeline-with-semantic-kernel-a-step-by-step-guide-7e7e3617a62b

In [None]:
from semantic_kernel.connectors.ai.hugging_face import HuggingFaceTextEmbedding
from semantic_kernel import Kernel

# Initialize the kernel
kernel = Kernel()

# Add the Hugging Face embedding service
kernel.add_service(
    HuggingFaceTextEmbedding(
        ai_model_id="sentence-transformers/all-MiniLM-L6-v2"
    )
)

In [None]:
kernel.add_embedding_to_object(["hello"])

In [None]:
from semantic_kernel.connectors.ai.open_ai import OpenAITextCompletion, AzureTextEmbedding
from openai import AzureOpenAI

from azure.ai.inference import EmbeddingsClient
from azure.core.credentials import AzureKeyCredential


kernel = Kernel()
# Add text completion service
# kernel.add_service(
#     service=OpenAITextCompletion(
#         ai_model_id="gpt-4o-mini",
#         api_key=ai_foundry_api
#     )
# )
# Add embedding generation service
# kernel.add_service(
#     service=AzureTextEmbedding(
#         deployment_name="text-embedding-3-large",
#         base_url=embed_endpoint,
#         api_key=ai_foundry_api
#     )
# )


In [None]:
import os
from openai import AzureOpenAI

model_name = "text-embedding-3-small"
deployment = "text-embedding-3-small"

api_version = "2024-02-01"

client = AzureOpenAI(
    api_version="2024-12-01-preview",
    # endpoint=endpoint,
    api_key=ai_foundry_api,
    azure_endpoint=embed_endpoint
)

client.service_id = "embedder"

kernel.add_service(client)


In [None]:
import os

from azure.ai.inference import EmbeddingsClient

model_name = "text-embedding-3-small"

client = EmbeddingsClient(
    endpoint=embed_endpoint,
    credential=AzureKeyCredential(ai_foundry_api),
)

kernel.add_service(service=client)

In [None]:
kernel.services

In [None]:
from semantic_kernel.memory import memory_store_base
from semantic_kernel.memory.memory_record import MemoryRecord
from semantic_kernel.connectors.memory.qdrant import QdrantMemoryStore
from semantic_kernel.memory.semantic_text_memory import SemanticTextMemory

memory_store = SemanticTextMemory(embeddings_generator=kernel.services.get("sentence-transformers/all-MiniLM-L6-v2"), storage=QdrantMemoryStore(vector_size=512))
kernel.add_plugin(memory_store)

# Sample knowledge base
documents = [
    ("doc1", "Semantic Kernel enables lightweight orchestration of LLMs and skills."),
    ("doc2", "RAG pipelines combine retrieval with generative models for improved responses.")
]

for doc_id, content in documents:
    memory_store.save_information(
        collection="knowledge-base",
        text=content,
        id=doc_id,
        # =MemoryRecord.local_record(id=doc_id, text=content, description="", additional_metadata="", embedding=embed_client.embeddings.create(input=[content], model=deployment).data[0].embedding)
    )

In [None]:
rdocs = await memory_store.search(collection="knowledge-base", query="What is Semantic Kernel?")

In [None]:
prompt_template = """
You are a helpful assistant. Answer the following question using the context below.

Context:
{{$retrieved_context}}

Question: {{$input}}

Answer:
"""

rag_function = kernel.create_semantic_function(
    prompt_template,
    description="RAG Answer Generator",
    max_tokens=300
)

In [None]:
await rag_function.send("Helo")

In [None]:
response.data[0].embedding

In [None]:
import os
import asyncio
import numpy as np
import faiss
from langchain.text_splitter import MarkdownTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.hugging_face import HuggingFaceTextEmbedding
from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion
from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
from semantic_kernel.functions.kernel_arguments import KernelArguments
from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
from semantic_kernel.connectors.ai.open_ai import AzureChatPromptExecutionSettings
import nest_asyncio

nest_asyncio.apply()

# Initialize the kernel
kernel = Kernel()

# Add Hugging Face embedding service
embedding_service = HuggingFaceTextEmbedding(ai_model_id="sentence-transformers/all-MiniLM-L6-v2", service_id="embedder")
kernel.add_service(embedding_service)

# Add Azure OpenAI chat completion service
# Replace with your actual deployment name, endpoint, and API key
chat_service = AzureChatCompletion(
    deployment_name="your-deployment-name",
    api_key="your-api-key"
)
kernel.add_service(chat_service)

# Load and split the PDF document
pdf_doc_path = "global_card_access_user_guide.pdf"
loader = PyPDFLoader(pdf_doc_path)
documents = loader.load()
text_splitter = MarkdownTextSplitter(chunk_size=300, chunk_overlap=30)
md_docs = text_splitter.split_documents(documents)
md_docs = [doc.page_content for doc in md_docs]


In [None]:
kernel.get_service("embedder").encode(
    texts=md_docs,
    model="text-embedding-3-small"
)

In [None]:
import asyncio

# Assuming embedding_service is an instance of HuggingFaceTextEmbedding
async def get_embedding():
    embed = await embedding_service.generate_embeddings(["hello"])
    return embed

# Run the asynchronous function
embedding = asyncio.run(get_embedding())
print(embedding)

In [None]:
import os
import asyncio
import nest_asyncio
import numpy as np
import faiss
from tqdm import tqdm

from langchain.text_splitter import MarkdownTextSplitter
from langchain_community.document_loaders import PyPDFLoader

from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.hugging_face import HuggingFaceTextEmbedding
from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion
from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
from semantic_kernel.connectors.ai.open_ai import AzureChatPromptExecutionSettings
from semantic_kernel.functions.kernel_arguments import KernelArguments

# Allow nested event loops for compatibility
nest_asyncio.apply()

# Required credentials
llm_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
ai_foundry_api = os.getenv("AZURE_OPENAI_KEY")

# Initialize Semantic Kernel
kernel = Kernel()

# Add Hugging Face embedding service
embedding_service = HuggingFaceTextEmbedding(ai_model_id="sentence-transformers/all-MiniLM-L6-v2")
kernel.add_service(embedding_service)

# Add Azure OpenAI Chat Completion
chat_service = AzureChatCompletion(
    deployment_name="gpt-4o-mini",
    endpoint=llm_endpoint,
    base_url=llm_endpoint,
    api_key=ai_foundry_api
)
kernel.add_service(chat_service)

# Load and chunk the document
pdf_doc_path = "/Users/ha/Desktop/Projects/Wipro task/global_card_access_user_guide.pdf"
loader = PyPDFLoader(pdf_doc_path)
documents = loader.load()
text_splitter = MarkdownTextSplitter(chunk_size=300, chunk_overlap=30)
md_docs = text_splitter.split_documents(documents)
md_docs = [doc.page_content for doc in md_docs]

# Initialize FAISS index (before inserting embeddings)
embedding_dim = 384  # For 'sentence-transformers/all-MiniLM-L6-v2'
index = faiss.IndexFlatL2(embedding_dim)

# Async function to generate and add embeddings one by one
import asyncio

async def generate_and_add_embeddings(docs):
    for doc in tqdm(docs):
        embedding = await embedding_service.generate_embeddings([doc])
        embedding_np = np.array(embedding).astype("float32")
        index.add(embedding_np)


# Run the async embedding generation and insertion

In [None]:
# await generate_and_add_embeddings(md_docs)

async def generate_all_embeddings(doc):
    embedding = asyncio.run(embedding_service.generate_embeddings([doc]))
    return embedding
    
await asyncio.run(generate_all_embeddings(md_docs[0]))


In [None]:
import os
import asyncio
import nest_asyncio
import numpy as np
import faiss

from langchain.text_splitter import MarkdownTextSplitter
from langchain_community.document_loaders import PyPDFLoader

from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.hugging_face import HuggingFaceTextEmbedding
from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion
from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
from semantic_kernel.connectors.ai.open_ai import AzureChatPromptExecutionSettings
from semantic_kernel.functions.kernel_arguments import KernelArguments
from sentence_transformers import SentenceTransformer
embedding_service = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')


# Allow nested event loops for compatibility
nest_asyncio.apply()

# Required credentials
llm_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
ai_foundry_api = os.getenv("AZURE_OPENAI_KEY")

# Initialize Semantic Kernel
kernel = Kernel()

# Add Hugging Face embedding service
# embedding_service = HuggingFaceTextEmbedding(ai_model_id="sentence-transformers/all-MiniLM-L6-v2")
# kernel.add_service(embedding_service)

# Add Azure OpenAI Chat Completion
chat_service = AzureChatCompletion(
    deployment_name="gpt-4o-mini",
    endpoint=llm_endpoint,
    base_url=llm_endpoint,
    api_key=ai_foundry_api
)
kernel.add_service(chat_service)

# Load and chunk the document
pdf_doc_path = "/Users/ha/Desktop/Projects/Wipro task/global_card_access_user_guide.pdf"
loader = PyPDFLoader(pdf_doc_path)
documents = loader.load()
text_splitter = MarkdownTextSplitter(chunk_size=300, chunk_overlap=30)
md_docs = text_splitter.split_documents(documents)
md_docs = [doc.page_content for doc in md_docs]

# Generate document embeddings
def generate_all_embeddings(docs):
    embeddings = []
    for doc in docs:
        embedding = embedding_service.encode([doc])
        embeddings.append(embedding[0])
    return embeddings

# Embed and index
embedding_matrix = np.array(generate_all_embeddings(md_docs)).astype("float32")
index = faiss.IndexFlatL2(embedding_matrix.shape[1])
index.add(embedding_matrix)

# RAG Query Function
async def rag_query(query: str):
    query_embedding = embedding_service.encode([query])
    query_embedding = np.array(query_embedding).astype("float32")

    k = 10
    distances, indices = index.search(query_embedding, k)
    relevant_chunks = [md_docs[i] for i in indices[0]]

    context = "\n".join(relevant_chunks)
    augmented_prompt = f"{context}\n\nUser Query: {query}"

    arguments = KernelArguments(
        settings=AzureChatPromptExecutionSettings(
            function_choice_behavior=FunctionChoiceBehavior.Auto(),
            top_p=0.9,
            temperature=0,
        )
    )

    response = await kernel.invoke_prompt(augmented_prompt, arguments=arguments)
    return response.value


In [None]:
# TEST THE PIPELINE
if __name__ == "__main__":
    user_query = "How do I activate my card?"
    output = asyncio.run(rag_query(user_query))
    print("\nResponse:\n", output[0].content)