In [1]:
import json

from langchain_community.vectorstores import FAISS
from langchain_cohere import ChatCohere, CohereEmbeddings
import pandas as pd 


### Step 1: Load Dataset


In [2]:
qa_dataset = pd.read_csv("customer_leads_agent_qa_data_csv.csv")
predefined_questions = qa_dataset["prompt_text"].tolist()
expected_responses = dict(zip(qa_dataset["prompt_text"], qa_dataset["expected_response"]))

print("Sample Questions:", predefined_questions[:5])
print("Sample Responses:", list(expected_responses.items())[:5])


Sample Questions: ['"What unique value do they offer?"', '"Who is your target audience?"', 'What\'s your ideal customer profile?"', '"What regions are you targeting?"', '"Who are your primary competitors?"']
Sample Responses: [('"What unique value do they offer?"', '"They offer unique value in areas like [strengths]."'), ('"Who is your target audience?"', '"Our target audience is [demographic]."'), ('What\'s your ideal customer profile?"', '"Our ideal customer profile includes [attributes]."'), ('"What regions are you targeting?"', '"We\'re focusing on regions like [location]."'), ('"Who are your primary competitors?"', '"Our competitors include [names]."')]


### Step 2: Generate Embeddings for Predefined Queries

In [3]:
embedding_model = CohereEmbeddings(cohere_api_key="QTIAR07ZVhcAVAPrUTHQozivAbRFhmhdoWwPsclg", model="embed-english-light-v3.0")
vector_store = FAISS.from_texts(predefined_questions, embedding_model)


### Step 3: Define Required Fields and Memory State

In [4]:
# Define required fields
REQUIRED_FIELDS = ["company_size", "industry", "location", "job_roles"]

# Initialize memory state
memory_state = {
    "conversation": [],  # Tracks user-agent interactions
    "extracted_data": {field: None for field in REQUIRED_FIELDS},  # Stores field values
    "missing_fields": REQUIRED_FIELDS[:],  # Tracks fields yet to be collected
}

print("Initialized Memory State:", json.dumps(memory_state, indent=4))


Initialized Memory State: {
    "conversation": [],
    "extracted_data": {
        "company_size": null,
        "industry": null,
        "location": null,
        "job_roles": null
    },
    "missing_fields": [
        "company_size",
        "industry",
        "location",
        "job_roles"
    ]
}


### Step 4: Define Functions

In [5]:
def create_dynamic_prompt(user_input, missing_fields):
    """
    Generates a prompt for the LLM to extract the missing fields from user input.
    """
    prompt = f"""
    # Role
    You are a Data Extraction Agent.

    # Objective
    Extract the following details from the user input: {', '.join(missing_fields)}.

    # Context
    The information extracted will help populate a structured memory state for a business analysis task.

    # Input
    User Input: {user_input}

    # Response Format (JSON):
    {{
        "company_size": null or "value",
        "industry": null or "value",
        "location": null or "value",
        "job_roles": null or ["list of roles"]
    }}
    """
    return prompt


In [6]:
def update_memory_with_response(memory_state, extracted_data):
    """
    Updates the memory state with the extracted fields and removes filled fields from missing_fields.
    """
    for field, value in extracted_data.items():
        if field in memory_state["missing_fields"] and value:  # Only update if field is missing and value is valid
            memory_state["extracted_data"][field] = value
            memory_state["missing_fields"].remove(field)
    return memory_state


### Step 4: Field Extraction Using LLM

In [7]:
def extract_user_fields(user_input, missing_fields, model):
    """
    Uses the LLM model to extract fields based on user input and missing fields.
    Handles structured responses like AIMessage.
    """
    prompt = create_dynamic_prompt(user_input, missing_fields)
    try:
        # Get the raw response from the model
        raw_response = model.predict(prompt)
        
        print(f'raw_response: {raw_response}')
        
        # Extract the string content
        response = raw_response.content if hasattr(raw_response, 'content') else raw_response
        
        # Parse the response as JSON
        extracted_data = json.loads(response)
        return extracted_data
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON response: {e}")
        return {}
    except Exception as e:
        print(f"Unexpected error: {e}")
        return {}


In [8]:
def query_matching(user_query, vector_store, expected_responses, threshold=0.75):
    """
    Matches a user query to the closest predefined question using vector embeddings.

    Args:
        user_query (str): The query from the user.
        vector_store (FAISS): Vector store containing embeddings of predefined questions.
        expected_responses (dict): Mapping of predefined questions to their responses.
        threshold (float): Minimum similarity score to consider a match.

    Returns:
        str: The matched response or a fallback response if no match is found.
    """
    try:
        # Perform similarity search
        matches = vector_store.similarity_search_with_score(user_query, k=1)
        
        if not matches:
            return "Sorry, I couldn't find a relevant match for your query."
        
        # Retrieve the best match and its similarity score
        best_match, score = matches[0]  # (Document, Score)
        
        if score >= threshold:
            matched_question = best_match.page_content
            return expected_responses.get(matched_question, "No predefined response found.")
        else:
            return "Sorry, your query doesn't closely match any known question."
    except Exception as e:
        print(f"Error during query matching: {e}")
        return "An error occurred while processing your query."


### Step 5: Feedback Loop for Iterative Interaction


In [11]:
# Step 5: Chat Function
def chat_with_fallback(model, memory_state, vector_store, expected_responses):
    print("Agent: Welcome! Feel free to ask questions or provide details.")
    while True:
        user_input = input("User: ")
        if user_input.lower() in ["exit", "quit", "done"]:
            print("Agent: Thank you for the information. Goodbye!")
            break
        if not user_input.strip():
            print("Agent: I didn't catch that. Could you please repeat?")
            continue
        
        
        # 2 - check for a new conversation
        if not memory_state.get('conversation_started', False):
            print("Agent: Starting a new conversation. Cleaning memory...")
            memory_state = {
                "conversation_started": True,
                "missing_fields": ["company_size", "industry", "location", "job_roles"],
                "extracted_data": {}
            }
 
        
        # Always check the response using query_matching
        response = query_matching(user_input, vector_store, expected_responses)
        
        if response != "Sorry, your query doesn't closely match any known question.":
            print("Agent:", response)
            print("Agent: I hope this answers your question. Goodbye!")
            break
            
        # If a response is found, print it and break the loop
        if response != "Sorry, your query doesn't closely match any known question.":
            reformulated_response = model.predict(
                f"Reformulate the following response to make it dynamic and engaging: {response}"
            )
            print("Agent:", reformulated_response)
            continue
        
        if memory_state["missing_fields"]:
            extracted_data = extract_user_fields(user_input, memory_state["missing_fields"], model)
            memory_state = update_memory_with_response(memory_state, extracted_data)
            print("Agent: I have updated your details.")
            if not memory_state["missing_fields"]:
                print("Agent: All required information has been collected. Moving to the next steps.")
                break
            else: 
                print("Agent: Please provide the following information:", memory_state["missing_fields"])
                continue

        # If no matching response or missing fields
        print("Agent: I didn't understand that. Can you clarify?")


### Step 6: Initialize Language Model and Start Interaction

In [10]:
# Step 6: Initialize Model and Start Chat
model = ChatCohere(cohere_api_key="QTIAR07ZVhcAVAPrUTHQozivAbRFhmhdoWwPsclg", temperature=0.1)
result = chat_with_fallback(model, memory_state, vector_store, expected_responses)

print(f"Chat Completed -  result: {result}")
    

Agent: Welcome! Feel free to ask questions or provide details.
Agent: "Available resources include [budget, team, technology]."
Agent: I hope this answers your question. Goodbye!
Chat Completed -  result: None
