<a href="https://colab.research.google.com/github/Keerthisree01/29_Bellamkonda_Keerthisree/blob/main/AI_Assistance_Customer_Service.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Task
Build an AI Customer Service Agent using RAG and escalation rules, based on the customer interaction data in 'CustomerInteractionData.csv'.

In [None]:
import pandas as pd

df = pd.read_csv('/content/CustomerInteractionData.csv')

print("First 5 rows of the DataFrame:")
print(df.head())

print("\nColumn names:")
print(df.columns)

print("\nDataFrame Information:")
df.info()

In [None]:
import re

# Extract the 'CustomerInteractionRawText' column
raw_text = df['CustomerInteractionRawText']

# Clean the extracted text
cleaned_text = raw_text.str.lower()
# Remove special characters, punctuation, and numbers
cleaned_text = cleaned_text.apply(lambda x: re.sub(r'[^a-zA-Z\s]', '', x))
# Remove extra whitespace and trim leading/trailing spaces
cleaned_text = cleaned_text.apply(lambda x: re.sub(r'\s+', ' ', x).strip())

df['CleanedInteractionText'] = cleaned_text

print("Original vs. Cleaned Text (first 5 rows):")
for i in range(5):
    print(f"Original: {df['CustomerInteractionRawText'].iloc[i]}")
    print(f"Cleaned: {df['CleanedInteractionText'].iloc[i]}\n")

In [None]:
def chunk_text(text, chunk_size=100):
    words = text.split()
    chunks = []
    for i in range(0, len(words), chunk_size):
        chunks.append(' '.join(words[i:i + chunk_size]))
    return chunks

# Apply the chunking function to the 'CleanedInteractionText' column
df['TextChunks'] = df['CleanedInteractionText'].apply(chunk_text)

print("First 5 rows of DataFrame with Text Chunks:")
print(df[['CleanedInteractionText', 'TextChunks']].head())

print("\nExample of a chunked interaction (first entry):\n")
for i, chunk in enumerate(df['TextChunks'].iloc[0]):
    print(f"Chunk {i+1}: {chunk}")

In [None]:
from sentence_transformers import SentenceTransformer

# Load a pre-trained sentence transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Initialize empty lists to store all individual text chunks and their corresponding embeddings
all_text_chunks = []
all_chunk_embeddings = []

print("Sentence Transformer model loaded successfully and lists initialized.")

In [None]:
import sys
!{sys.executable} -m pip install faiss-cpu

import faiss
import numpy as np

# Clear and re-populate lists to ensure a fresh start
all_text_chunks = []
all_chunk_embeddings = []

# Populate all_text_chunks from the DataFrame
for chunks_list in df['TextChunks']:
    all_text_chunks.extend(chunks_list)

# Encode all collected text chunks into numerical vector embeddings
all_chunk_embeddings = model.encode(all_text_chunks, show_progress_bar=True)

# Get the dimension of the embeddings
embedding_dimension = all_chunk_embeddings.shape[1]

# Create a FAISS index (e.g., IndexFlatL2 for L2 distance)
index = faiss.IndexFlatL2(embedding_dimension)

# Add the generated embeddings to the index
index.add(np.array(all_chunk_embeddings).astype('float32'))

print(f"Total text chunks: {len(all_text_chunks)}")
print(f"Shape of all_chunk_embeddings: {all_chunk_embeddings.shape}")
print(f"FAISS index contains {index.ntotal} vectors.")

In [None]:
import sys
!{sys.executable} -m pip install faiss-cpu

In [None]:
import faiss
import numpy as np

# Populate all_text_chunks from the DataFrame
for chunks_list in df['TextChunks']:
    all_text_chunks.extend(chunks_list)

# Encode all collected text chunks into numerical vector embeddings
all_chunk_embeddings = model.encode(all_text_chunks, show_progress_bar=True)

# Get the dimension of the embeddings
embedding_dimension = all_chunk_embeddings.shape[1]

# Create a FAISS index (e.g., IndexFlatL2 for L2 distance)
index = faiss.IndexFlatL2(embedding_dimension)

# Add the generated embeddings to the index
index.add(np.array(all_chunk_embeddings).astype('float32'))

print(f"Total text chunks: {len(all_text_chunks)}")
print(f"Shape of all_chunk_embeddings: {all_chunk_embeddings.shape}")
print(f"FAISS index contains {index.ntotal} vectors.")

In [None]:
def simulate_ask(user_query, top_k=3):
    # 1. Embed the user query
    query_embedding = model.encode([user_query])
    query_embedding = np.array(query_embedding).astype('float32')

    # 2. Perform similarity search
    distances, indices = index.search(query_embedding, top_k)

    # 3. Retrieve relevant text chunks (source IDs)
    retrieved_context = []
    source_ids = []
    for i in indices[0]:
        if i < len(all_text_chunks): # Ensure index is within bounds
            retrieved_context.append(all_text_chunks[i])
            source_ids.append(i)

    # 4. Simulate LLM response
    if retrieved_context:
        generated_answer = f"Based on your question: '{user_query}', and the following context: [{'; '.join(retrieved_context)}], the AI assistant says: This is a simulated response based on the relevant information found."
    else:
        generated_answer = f"Based on your question: '{user_query}', the AI assistant says: I couldn't find relevant information in my knowledge base. Please try rephrasing your question."

    return generated_answer, source_ids

print("The 'simulate_ask' function has been defined.")

In [None]:
user_query = "What is the customer's issue regarding porting out?"
answer, sources = simulate_ask(user_query)

print(f"User Query: {user_query}")
print(f"Generated Answer: {answer}")
print(f"Source IDs: {sources}")

In [None]:
escalation_keywords = ['escalate', 'urgent', 'manager', 'complaint']

def simulate_ask(user_query, top_k=3):
    escalation_required = False

    # Check for escalation keywords in the user query
    for keyword in escalation_keywords:
        if keyword in user_query.lower():
            escalation_required = True
            break

    # 1. Embed the user query
    query_embedding = model.encode([user_query])
    query_embedding = np.array(query_embedding).astype('float32')

    # 2. Perform similarity search
    distances, indices = index.search(query_embedding, top_k)

    # 3. Retrieve relevant text chunks (source IDs)
    retrieved_context = []
    source_ids = []
    for i in indices[0]:
        if i < len(all_text_chunks): # Ensure index is within bounds
            retrieved_context.append(all_text_chunks[i])
            source_ids.append(i)

    # 4. Simulate LLM response and handle escalation if no context found
    if not retrieved_context:
        escalation_required = True
        generated_answer = f"Based on your question: '{user_query}', the AI assistant says: I couldn't find relevant information in my knowledge base. This query requires escalation due to lack of relevant information."
    else:
        generated_answer = f"Based on your question: '{user_query}', and the following context: [{'; '.join(retrieved_context)}], the AI assistant says: This is a simulated response based on the relevant information found."

    # Add escalation message if required by keywords
    if escalation_required and not (not retrieved_context):
        generated_answer = "Escalation is required. " + generated_answer
    elif escalation_required and (not retrieved_context):
        pass # Message already includes escalation due to no context

    return generated_answer, source_ids, escalation_required

print("The 'simulate_ask' function has been updated with escalation logic.")

In [None]:
print("\n--- Testing Escalation Logic ---")

# Test Case 1: Query with escalation keyword
user_query_escalate_keyword = "I have an urgent complaint about my service."
answer_ek, sources_ek, escalate_ek = simulate_ask(user_query_escalate_keyword)
print(f"\nUser Query (Keyword): {user_query_escalate_keyword}")
print(f"Generated Answer: {answer_ek}")
print(f"Source IDs: {sources_ek}")
print(f"Escalation Required: {escalate_ek}")

# Test Case 2: Query with no relevant context (likely to trigger escalation if no context is found)
user_query_no_context = "Tell me about quantum physics and black holes."
answer_nc, sources_nc, escalate_nc = simulate_ask(user_query_no_context)
print(f"\nUser Query (No Context): {user_query_no_context}")
print(f"Generated Answer: {answer_nc}")
print(f"Source IDs: {sources_nc}")
print(f"Escalation Required: {escalate_nc}")

# Test Case 3: Standard query without escalation
user_query_normal = "What is the customer's issue regarding porting out?"
answer_n, sources_n, escalate_n = simulate_ask(user_query_normal)
print(f"\nUser Query (Normal): {user_query_normal}")
print(f"Generated Answer: {answer_n}")
print(f"Source IDs: {sources_n}")
print(f"Escalation Required: {escalate_n}")

In [None]:
escalation_keywords = ['escalate', 'urgent', 'manager', 'complaint']

def simulate_ask(user_query, top_k=3, relevance_threshold=0.7): # Added relevance_threshold parameter
    escalation_required_by_keyword = False

    # Check for escalation keywords in the user query
    for keyword in escalation_keywords:
        if keyword in user_query.lower():
            escalation_required_by_keyword = True
            break

    # 1. Embed the user query
    query_embedding = model.encode([user_query])
    query_embedding = np.array(query_embedding).astype('float32')

    # 2. Perform similarity search
    distances, indices = index.search(query_embedding, top_k)

    # 3. Retrieve relevant text chunks (source IDs), filtering by relevance_threshold
    retrieved_context = []
    source_ids = []

    for i, dist in zip(indices[0], distances[0]):
        if dist < relevance_threshold and i < len(all_text_chunks): # Only add if distance is below threshold and index is valid
            retrieved_context.append(all_text_chunks[i])
            source_ids.append(i)

    escalation_required_by_no_context = False
    if not retrieved_context: # If no context found after filtering by relevance
        escalation_required_by_no_context = True

    final_escalation_status = escalation_required_by_keyword or escalation_required_by_no_context

    # 4. Simulate LLM response based on context and escalation status
    if escalation_required_by_no_context:
        generated_answer = f"Based on your question: '{user_query}', the AI assistant says: I couldn't find sufficiently relevant information in my knowledge base. This query requires escalation due to lack of relevant information."
    elif retrieved_context:
        generated_answer = f"Based on your question: '{user_query}', and the following context: [{'; '.join(retrieved_context)}], the AI assistant says: This is a simulated response based on the relevant information found."
    else: # Fallback, though ideally should be caught by `escalation_required_by_no_context`
        generated_answer = f"Based on your question: '{user_query}', the AI assistant says: I couldn't process your request."

    # Prepend escalation message if keywords triggered it and not already handled by no-context message
    if escalation_required_by_keyword and not escalation_required_by_no_context:
        generated_answer = "Escalation is required (keywords). " + generated_answer
    elif escalation_required_by_keyword and escalation_required_by_no_context:
        # If both trigger, combine the message
        generated_answer = generated_answer.replace(
            "This query requires escalation due to lack of relevant information.",
            "This query requires escalation due to keywords and lack of relevant information."
        )
        generated_answer = "Escalation is required (keywords and no relevant context). " + generated_answer


    return generated_answer, source_ids, final_escalation_status

print("The 'simulate_ask' function has been updated with refined escalation logic and relevance threshold.")

In [None]:
print("\n--- Retesting Refined Escalation Logic ---")

# Test Case 1: Query with escalation keyword (should still escalate)
user_query_escalate_keyword = "I have an urgent complaint about my service."
answer_ek, sources_ek, escalate_ek = simulate_ask(user_query_escalate_keyword)
print(f"\nUser Query (Keyword): {user_query_escalate_keyword}")
print(f"Generated Answer: {answer_ek}")
print(f"Source IDs: {sources_ek}")
print(f"Escalation Required: {escalate_ek}")

# Test Case 2: Query with no relevant context (should now trigger escalation with relevance_threshold)
# Adjust relevance_threshold to be more stringent if previous attempts didn't trigger 'no context'
# For example, if a distance of 0.7 was too high, try 0.5 or 0.3. Let's start with 0.5 for demonstration.
user_query_no_context = "Tell me about quantum physics and black holes. This is urgent."
answer_nc, sources_nc, escalate_nc = simulate_ask(user_query_no_context, relevance_threshold=0.5)
print(f"\nUser Query (No Context, stricter threshold): {user_query_no_context}")
print(f"Generated Answer: {answer_nc}")
print(f"Source IDs: {sources_nc}")
print(f"Escalation Required: {escalate_nc}")

# Test Case 3: Standard query without escalation (should still not escalate)
user_query_normal = "What is the customer's issue regarding porting out?"
answer_n, sources_n, escalate_n = simulate_ask(user_query_normal)
print(f"\nUser Query (Normal): {user_query_normal}")
print(f"Generated Answer: {answer_n}")
print(f"Source IDs: {sources_n}")
print(f"Escalation Required: {escalate_n}")

# Test Case 4: Combined escalation - keyword AND no relevant context (using a very out-of-domain query)
user_query_combined_escalation = "I need to escalate this matter about alien abductions, it's urgent!"
answer_ce, sources_ce, escalate_ce = simulate_ask(user_query_combined_escalation, relevance_threshold=0.5)
print(f"\nUser Query (Combined Escalation): {user_query_combined_escalation}")
print(f"Generated Answer: {answer_ce}")
print(f"Source IDs: {sources_ce}")
print(f"Escalation Required: {escalate_ce}")

In [None]:
print("\n--- Demonstrating AI Agent Functionality ---")

# Test Case 1: Query that should result in a direct answer with relevant sources
user_query_direct_answer = "What is the customer's issue regarding porting out?"
answer_da, sources_da, escalate_da = simulate_ask(user_query_direct_answer)
print(f"\nScenario: Direct Answer with Relevant Sources")
print(f"User Query: {user_query_direct_answer}")
print(f"Generated Answer: {answer_da}")
print(f"Source IDs: {sources_da}")
print(f"Escalation Required: {escalate_da}")

# Test Case 2: Query containing an escalation keyword
user_query_keyword_escalation = "I need to speak to a manager about my bill."
answer_ke, sources_ke, escalate_ke = simulate_ask(user_query_keyword_escalation)
print(f"\nScenario: Query with Escalation Keyword")
print(f"User Query: {user_query_keyword_escalation}")
print(f"Generated Answer: {answer_ke}")
print(f"Source IDs: {sources_ke}")
print(f"Escalation Required: {escalate_ke}")

# Test Case 3: Query for which no relevant information exists (should trigger no-context escalation)
user_query_no_info = "Explain the theory of relativity to me."
answer_ni, sources_ni, escalate_ni = simulate_ask(user_query_no_info, relevance_threshold=0.5) # Using a stricter threshold to ensure no context
print(f"\nScenario: Query with No Relevant Information")
print(f"User Query: {user_query_no_info}")
print(f"Generated Answer: {answer_ni}")
print(f"Source IDs: {sources_ni}")
print(f"Escalation Required: {escalate_ni}")

# Test Case 4: Query with both an escalation keyword and no relevant information
user_query_combined_escalation = "This is urgent, I need a manager to resolve my issue with intergalactic travel services!"
answer_ce, sources_ce, escalate_ce = simulate_ask(user_query_combined_escalation, relevance_threshold=0.5) # Using a stricter threshold
print(f"\nScenario: Combined Escalation (Keyword + No Relevant Info)")
print(f"User Query: {user_query_combined_escalation}")
print(f"Generated Answer: {answer_ce}")
print(f"Source IDs: {sources_ce}")
print(f"Escalation Required: {escalate_ce}")