In [240]:
import uuid

from langgraph.graph import START, MessagesState, StateGraph
from langgraph.checkpoint.memory import MemorySaver 
from langchain_cohere import ChatCohere

In [173]:
    # Define a new graph
workflow = StateGraph(state_schema=MessagesState)
model = ChatCohere(cohere_api_key="QTIAR07ZVhcAVAPrUTHQozivAbRFhmhdoWwPsclg", temperature=0.1)


In [174]:
def call_model(state: MessagesState):
    response = model.invoke(state["messages"])
    return {"messages": response}

In [175]:
# Define workflow nodes
workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

<langgraph.graph.state.StateGraph at 0x2753632aa10>

In [176]:
# Set up memory
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [177]:
# Generate a unique thread ID for conversation management
thread_id = uuid.uuid4()
config = {"configurable": {"thread_id": thread_id}}

In [178]:
field_extraction_prompt = """
# Role

You are a Data Extraction Agent.

# Objective

Your objective is to identify and extract specific business details from provided text, including industry, company size, location, and job roles. You should construct a full `user_info` dictionary with these details and indicate if there are any missing fields.

# Context

The extracted information will help provide structured business data from raw text inputs, which will support various business and analytical operations.

# SOP

1. Analyze the user-provided text in the variable user_input.

2. Construct and return the following dictionary:

{ "company_size": null or employee range (e.g., "1-10", "500+"), "industry": null or specific sector (e.g., "Tech", "Healthcare"), "location": null or specific location (e.g., city or country), "job_roles": [] or list of job titles mentioned }

3. In addition to the dictionary, provide a list of fields that are still missing or mention that all fields are complete.

# Examples

**Example 1:**

**Input:**

"Text describing a tech company based in New York with 200+ employees and titles like CEO, Data Scientist, and Product Manager."

**Output:**

{
    "user_info": {
        "company_size": "200+",
        "industry": "Tech",
        "location": "New York",
        "job_roles": ["CEO", "Data Scientist", "Product Manager"]
    },
    "missing_fields": []
}

**Example 2:**

**Input:**

"Description of a healthcare firm in Dubai without specific company size or job titles mentioned."

**Output:**

{
    "user_info": {
        "company_size": null,
        "industry": "Healthcare",
        "location": "Dubai",
        "job_roles": []
    },
    "missing_fields": ["company_size", "job_roles"]
}

**Example 3:**

**Input:**

"A small tech startup with less than 10 employees, located in San Francisco. The team includes a CEO and a CTO."

**Output:**

{
    "user_info": {
        "company_size": "1-10",
        "industry": "Tech",
        "location": "San Francisco",
        "job_roles": ["CEO", "CTO"]
    },
    "missing_fields": []
}
"""

### Extract user fields:


In [179]:
REQUIRED_FIELDS = {"company_size", "industry", "job_roles", "location"}

# Global dictionary to keep track of extracted fields and missing fields
recorded_fields = {
    "user_info": {},
    "missing_fields": list(REQUIRED_FIELDS)  # Initially, all fields are required
}

In [180]:
def extract_user_fields(message, model):
    prompt = field_extraction_prompt + f"\nInput: {message}\nResponse format (JSON): {{'company_size': '', 'industry': '', 'job_roles': '', 'location': ''}}"
        
    try: 
        llm_response = model.predict(prompt) 
        llm_extracted_data = json.loads(llm_response)
        print(llm_extracted_data)
        return llm_extracted_data
    except KeyError as e:
        print(f"Missing key in template formatting: {e}")
    except json.JSONDecodeError as e:
        print(f"Failed to decode JSON response: {e}")


In [None]:
# Test the extraction
user_input = "A startup"
extracted_data = extract_user_fields(user_input, model)

print("Extracted Data:", extracted_data)


# Define the required fields
required_fields = ['industry', 'company_size', 'location', 'job_roles']

print("Missing Fields:", extracted_data["missing_fields"])


In [182]:
# Memory initialization
memory_state = {
    "conversation": [],
    "extracted_data": {"company_size": None, "industry": None, "job_roles": None, "location": None},
    "missing_fields": ["company_size", "industry", "job_roles", "location"],
}

def update_memory_with_response(llm_memory_state, llm_extracted_data):
    for field, value in llm_extracted_data.items():
        if value in llm_memory_state["extracted_data"]:
            llm_memory_state["extracted_data"][field] = value
            llm_memory_state["missing_fields"].remove(field)
    return llm_memory_state




In [194]:
def create_dynamic_prompt(user_input, missing_fields):
    prompt = f"Extract the following details: {', '.join(missing_fields)}.\n"
    prompt += f"Input: {user_input}\nResponse format (JSON): {{'company_size': '', 'industry': '', 'job_roles': '', 'location': ''}}"
    return prompt


In [195]:
def feedback_loop(model, memory_state):
    """Iterative dialog with the user to complete missing fields."""
    print("Agent: Let's gather some details about your target.")
    
    while memory_state["missing_fields"]:
        missing_fields = memory_state["missing_fields"]    
    
        dynamic_prompt = create_dynamic_prompt("", missing_fields)

        # Ask the user for the missing field
        user_input = input(f"Agent: Please provide details about {', '.join(missing_fields)}.\nUser: ")
        
        # Update the prompt with user input
        full_prompt = create_dynamic_prompt(user_input, missing_fields)

        # Extract the user input
        extracted_data = extract_user_fields(full_prompt, model)
        
        # Update the memory state
        memory_state = update_memory_with_response(memory_state, extracted_data)
        
        # Log the conversation
        memory_state["conversation"].append({"role": "user", "content": user_input})
        memory_state["conversation"].append({"role": "agent", "content": f"Extracted: {extracted_data}"})
        print("Memory State:", memory_state)
        print("Agent: Thank you! All fields are complete.")

    return memory_state

In [196]:
validation_prompt = create_dynamic_prompt(user_input, ["company_size", "location"])
validation_response = model.predict(validation_prompt)

In [None]:
extracted_data = extract_user_fields(user_input, model)
refined_query = create_dynamic_prompt("", ["industry", "location"])

## Query Matching and storage

In [225]:
def query_matching(user_query, llm_vector_store, llm_responses, threshold=0.5):
    """
    Matches a user query to the most similar predefined question in the vector store.

    Args:
        user_query (str): The query from the user.
        llm_vector_store (FAISS): Vector store containing embeddings of predefined questions.
        llm_responses (dict): Dictionary mapping predefined questions to responses.
        threshold (float): Minimum similarity score to consider a match.

    Returns:
        str: The matched response or a fallback response if no match is found.
    """
    try: 
        matches = vector_store.similarity_search_with_score(user_query, k=1)
        
        if not matches:
            return "Sorry, I couldn't find a relevant match for your query."

        # Retrieve the best match and its similarity score
        best_match, score = matches[0]  # Unpack the first result (document, score)
        
        print(f'best_match:{best_match.page_content}')
        print(f'score:{score}')
        
        if score >= threshold:
            question= best_match.page_content    
            return llm_responses.get(question, "No Predefined response found")
        else:
            return "Sorry, your query doesn't match any known question closely enough."
    except Exception as e:
        print(f"Error during query matching: {e}")
        return "No match found."

In [226]:
from langchain.vectorstores import FAISS
from langchain_cohere import CohereEmbeddings
import json
import pandas as pd

# Load the dataset
qa_dataset = pd.read_csv('./customer_leads_agent_qa_data_csv.csv', delimiter=',').to_dict(orient='records')

# Convert questions into embeddings
embeddings = CohereEmbeddings(cohere_api_key="QTIAR07ZVhcAVAPrUTHQozivAbRFhmhdoWwPsclg", model="embed-english-light-v3.0" )

questions = [item["prompt_text"] for item in qa_dataset]
vector_store = FAISS.from_texts(questions, embeddings)
    
# Map each vector to its expected response
responses = {item["prompt_text"]: item["expected_response"] for item in qa_dataset}


In [227]:
# User's input query
user_query = "What unique value do they offer?"

# Query matching
response = query_matching(user_query, vector_store, responses, threshold=0.3)

# Print the response
print("Agent's Response:", response)


best_match:"What unique value do they offer?"
score:0.7451424598693848
Agent's Response: "They offer unique value in areas like [strengths]."


In [186]:
def merge_memory(current_memory, new_memory):
    for key, value in new_memory.items():
        if isinstance(value, dict) and key in current_memory:
            # Recursive merge for nested dictionaries
            merge_memory(current_memory[key], value)
        else:
            # Update scalar values or new keys
            current_memory[key] = value

In [187]:
# def handle_no_match(llm_memory_state, llm_model):
#     """Fallback to feedback loop when no match is found."""
#     print("Agent: I couldn't find a predefined match. Let's refine your query.")
#     llm_memory_state, llm_extracted_data = feedback_loop(memory_state=memory_state, model=llm_model)
#     return llm_memory_state, llm_extracted_data


In [202]:
import json

feedback_prompt_template = f"""
# Role
You are a Data Extraction Agent responsible for collecting specific business details iteratively.

# Objective
Your task is to:
1. Update the `memory_state` dictionary with any new information extracted from `user_input`.
2. Identify any fields still missing and construct a `next_prompt` to ask the user for these details.
3. If all required fields are complete, indicate this in the `next_prompt`.

# Memory State: {memory_state}

# User Input: {user_input}

# Response Format
Respond in the following JSON format:
{
    "updated_memory": {memory_state},
    "next_prompt": "string"
}
"""


ValueError: Invalid format specifier ' {'conversation': [], 'extracted_data': {'company_size': None, 'industry': None, 'job_roles': None, 'location': None}, 'missing_fields': ['company_size', 'industry', 'job_roles', 'location']},
    "next_prompt": "string"
' for object of type 'str'

In [140]:
import copy


def model_driven_feedback_loop(fcn_user_input, fcn_memory_state, model):
    """
    Handles feedback loop by using the model to manage memory and prompts dynamically.
    
    Args:
        fcn_user_input (str): The current user input.
        fcn_memory_state (dict): Dictionary containing collected data and missing fields.
        model (ChatOpenAI): Language model instance.
    
    Returns:
        updated_memory (dict): Updated memory state with new extracted data.
        next_prompt (str): Next prompt for the user.
    """
    try:
        # Prepare memory for the prompt
        memory_state_str = json.dumps(fcn_memory_state, indent=4)
        updated_memory = fcn_memory_state.get("updated_memory", {})
        updated_memory_str = json.dumps(updated_memory, indent=4)

        # Construct the prompt
        prompt = f"""
        Role: You are a Data Extraction Agent responsible for collecting specific business details iteratively. 
        # Objective
        Your task is to:
        1. Update the `memory_state` dictionary with any new information extracted from `user_input`.
        2. Identify any fields still missing and construct a `next_prompt` to ask the user for these details.
        3. If all required fields are complete, indicate this in the `next_prompt`.

        # Memory State:
        {memory_state_str}

        # User Input:
        {fcn_user_input}

        # Response Format
        Respond in the following JSON format:
        {{
            "updated_memory": {updated_memory_str},
            "next_prompt": "string"
        }}
        """
        
        # Debugging: Print the constructed prompt
        print(f"Prompt sent to model: {prompt}")
        
        # Generate the response
        response = model.predict(prompt)
        print(f"Response from model: {response}")

        # Parse the response
        response_data = json.loads(response)
        
        # Update memory
        fcn_updated_memory = copy.deepcopy(fcn_memory_state)
        merge_memory(fcn_updated_memory, response_data.get("updated_memory", {}))
        
        
        # Debugging: Check memory updates
        print("Original memory state:", json.dumps(fcn_memory_state, indent=4))
        print("Response data:", json.dumps(response_data, indent=4))
        print("Updated memory state:", json.dumps(fcn_updated_memory, indent=4))

        # Get the next prompt
        fcn_next_prompt = response_data.get("next_prompt", "All required fields are complete.")

        return fcn_updated_memory, fcn_next_prompt
    
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON response: {e}")
        return fcn_memory_state, "An error occurred. Please try again."
    except Exception as e:
        print(f"Unexpected error: {e}")
        return fcn_memory_state, "An error occurred. Please try again."

In [228]:
user_inputs_example = [
    "I am targeting companies in San Francisco.",
    "These companies should have 500+ employees.",
    "I want to reach CTOs and Product Managers."
]

memory_state_example = {
    "updated_memory": {
        "company_size": None,
        "industry": None,
        "location": None,
        "job_roles": []
    }
}




In [None]:
# Iterative feedback loop
for user_input in user_inputs_example:
    memory_state_example, next_prompt = model_driven_feedback_loop(
        fcn_user_input=user_input,
        fcn_memory_state=memory_state_example,
        model=model
    )
    print(f"Agent: {next_prompt}")
    print(f"Updated Memory State: {json.dumps(memory_state_example, indent=4)}")
    if "All required fields are complete" in next_prompt:
        break


In [238]:
def chat_with_model(model_fcn, memory_state_fcn):
    """
    Simulates a chat between the user and the AI model using the model-driven feedback loop.
    
    Args:
        model_fcn (ChatOpenAI): The AI model instance.
        memory_state_fcn (dict): Tracks extracted data and missing fields.

    Returns:
        None: Continuously interacts with the user until all fields are collected.
    """
    print("Agent: Let's gather the necessary details. Feel free to provide information step by step.")
    
    debug_mode = True  # Set this flag to False in production    
    
    while True:
        try:
                 
            if memory_state_fcn["missing_fields"]:
                next_prompt_fcn = f"Please provide the {memory_state_fcn['missing_fields'][0]}."
            else:
                next_prompt_fcn = "All required fields are complete. Do you need any further assistance?"
 
       
            # Display the AI's next prompt to the user
            # Display the AI's next prompt to the user
            print(f"Agent: {next_prompt_fcn}")


            # Get user input
            user_input = input("User: ")
            
                        # Exit condition
            if user_input.lower() in ["exit", "quit", "done"]:
                print("Agent: Thank you for the information. Goodbye!")
                break
                
                
            if not user_input.strip():
                print("Agent: I didn't catch that. Could you provide more details?")
                continue

            # Match the user query to a predefined response
            response = query_matching(user_input, vector_store, responses)
            
            if response:
                print("Agent:", response)
            else:
                # Run the feedback loop with the user's input
                memory_state_fcn, next_prompt_fcn = model_driven_feedback_loop(
                    fcn_user_input=user_input,
                    fcn_memory_state=memory_state_fcn,
                    model=model_fcn
                )
        
        
            # Debugging: Log the updated memory state if debugging is enabled
            if debug_mode:
                print(f"Updated Memory State: {json.dumps(memory_state_fcn, indent=4)}")

            # Check if all fields are complete
            if "All required fields are complete" in next_prompt_fcn:
                print(f"Agent: {next_prompt_fcn}")
                print(f"Final Memory State: {json.dumps(memory_state_fcn, indent=4)}")
                break

        except Exception as e:
            print(f"An error occurred: {e}")
            print("Agent: I encountered an issue. Please try again.")
            break

Prompt: What is your name?
User input: Alice
All information has been collected.
{'missing_fields': [], 'next_prompt': 'Thank you! Any more information to provide?', 'user_name': 'Alice'}


In [191]:
def initialize_agent():
    """
    Initializes the model and memory state for the chat agent.
    Returns:
        model_fcn (ChatCohere): The AI model instance.
        memory_state_fcn (dict): The initial memory state.
    """
    # Initialize model
    model_fcn = ChatCohere(cohere_api_key="QTIAR07ZVhcAVAPrUTHQozivAbRFhmhdoWwPsclg", temperature=0.1)

    # Initialize memory state
    memory_state_fcn = {
        "updated_memory": {
            "company_size": None,
            "industry": None,
            "location": None,
            "job_roles": []
        },
        "next_prompt": "How may I help you today?"
    }

    return model_fcn, memory_state_fcn


In [236]:
initial_memory_state = {"missing_fields": ["user_name"], "next_prompt": "What is your name?"}


In [241]:
if __name__ == "__main__":    
   chat_with_model(model_fcn=model, memory_state_fcn=initial_memory_state)


All information has been collected.


In [242]:
def get_dynamic_prompt(missing_field):
    prompts = {
        "industry": "What industry are you working in?",
        "company_size": "Can you tell me your company size?",
        "location": "Where is your company located?",
        "job_roles": "What are the key job roles in your company?",
    }
    return prompts.get(missing_field, "Can you provide more details?")


In [243]:
def chat_flow():
    while memory_state["missing_fields"]:
        # Pick the first missing field
        missing_field = memory_state["missing_fields"][0]
        
        # Generate a prompt for the user
        prompt = get_dynamic_prompt(missing_field)
        print(prompt)  # This is where the user sees the question
        
        # Simulate user response
        user_response = input("Your response: ")  # Replace with actual input logic

        # Update memory with user response
        if user_response.strip():
            memory_state["extracted_data"][missing_field] = user_response
            memory_state["missing_fields"].remove(missing_field)
        else:
            print("Invalid response, please provide more details.")

    print("All fields have been filled:")
    print(memory_state["extracted_data"])
