In [233]:
import uuid
import json

from langgraph.graph import START, MessagesState, StateGraph
from langgraph.checkpoint.memory import MemorySaver 
from langchain_cohere import ChatCohere

In [234]:
    # Define a new graph
workflow = StateGraph(state_schema=MessagesState)
model = ChatCohere(cohere_api_key="QTIAR07ZVhcAVAPrUTHQozivAbRFhmhdoWwPsclg", temperature=0.1)


In [235]:
def call_model(state: MessagesState):
    response = model.invoke(state["messages"])
    return {"messages": response}

In [236]:
# Define workflow nodes
workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

<langgraph.graph.state.StateGraph at 0x20b291c9390>

In [237]:
# Set up memory
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [238]:
# Generate a unique thread ID for conversation management
thread_id = uuid.uuid4()
config = {"configurable": {"thread_id": thread_id}}

In [239]:
field_extraction_prompt = """
# Role

You are a Data Extraction Agent.

# Objective

Your objective is to identify and extract specific business details from provided text, including industry, company size, location, and job roles. You should construct a full `user_info` dictionary with these details and indicate if there are any missing fields.

# Context

The extracted information will help provide structured business data from raw text inputs, which will support various business and analytical operations.

# SOP

1. Analyze the user-provided text in the variable user_input.

2. Construct and return the following dictionary:

{ "company_size": null or employee range (e.g., "1-10", "500+"), "industry": null or specific sector (e.g., "Tech", "Healthcare"), "location": null or specific location (e.g., city or country), "job_roles": [] or list of job titles mentioned }

3. In addition to the dictionary, provide a list of fields that are still missing or mention that all fields are complete.

# Examples

**Example 1:**

**Input:**

"Text describing a tech company based in New York with 200+ employees and titles like CEO, Data Scientist, and Product Manager."

**Output:**

{
    "user_info": {
        "company_size": "200+",
        "industry": "Tech",
        "location": "New York",
        "job_roles": ["CEO", "Data Scientist", "Product Manager"]
    },
    "missing_fields": []
}

**Example 2:**

**Input:**

"Description of a healthcare firm in Dubai without specific company size or job titles mentioned."

**Output:**

{
    "user_info": {
        "company_size": null,
        "industry": "Healthcare",
        "location": "Dubai",
        "job_roles": []
    },
    "missing_fields": ["company_size", "job_roles"]
}

**Example 3:**

**Input:**

"A small tech startup with less than 10 employees, located in San Francisco. The team includes a CEO and a CTO."

**Output:**

{
    "user_info": {
        "company_size": "1-10",
        "industry": "Tech",
        "location": "San Francisco",
        "job_roles": ["CEO", "CTO"]
    },
    "missing_fields": []
}
"""

### Extract user fields:


In [240]:
REQUIRED_FIELDS = {"company_size", "industry", "job_roles", "location"}

# Global dictionary to keep track of extracted fields and missing fields
recorded_fields = {
    "user_info": {},
    "missing_fields": list(REQUIRED_FIELDS)  # Initially, all fields are required
}

In [241]:
def extract_user_fields(message, model):
    prompt = field_extraction_prompt + f"\nInput: {message}\nResponse format (JSON): {{'company_size': '', 'industry': '', 'job_roles': '', 'location': ''}}"
        
    try: 
        llm_response = model.predict(prompt) 
        llm_extracted_data = json.loads(llm_response)
        print(llm_extracted_data)
        return llm_extracted_data
    except KeyError as e:
        print(f"Missing key in template formatting: {e}")
    except json.JSONDecodeError as e:
        print(f"Failed to decode JSON response: {e}")


In [242]:
# Test the extraction
user_input = "A startup"
extracted_data = extract_user_fields(user_input, model)

print("Extracted Data:", extracted_data)


# Define the required fields
required_fields = ['industry', 'company_size', 'location', 'job_roles']

print("Missing Fields:", extracted_data["missing_fields"])


{'user_info': {'company_size': None, 'industry': None, 'location': None, 'job_roles': []}, 'missing_fields': ['company_size', 'industry', 'location', 'job_roles']}
Extracted Data: {'user_info': {'company_size': None, 'industry': None, 'location': None, 'job_roles': []}, 'missing_fields': ['company_size', 'industry', 'location', 'job_roles']}
Missing Fields: ['company_size', 'industry', 'location', 'job_roles']


In [243]:
# Memory initialization
memory_state = {
    "conversation": [],
    "extracted_data": {"company_size": None, "industry": None, "job_roles": None, "location": None},
    "missing_fields": ["company_size", "industry", "job_roles", "location"],
}

def update_memory_with_response(llm_memory_state, llm_extracted_data):
    for field, value in llm_extracted_data.items():
        if value in llm_memory_state["extracted_data"]:
            llm_memory_state["extracted_data"][field] = value
            llm_memory_state["missing_fields"].remove(field)
    return llm_memory_state




In [244]:
def create_dynamic_prompt(user_input, missing_fields):
    prompt = f"Extract the following details: {', '.join(missing_fields)}.\n"
    prompt += f"Input: {user_input}\nResponse format (JSON): {{'company_size': '', 'industry': '', 'job_roles': '', 'location': ''}}"
    return prompt


In [245]:
def feedback_loop(model, memory_state):
    """Iterative dialog with the user to complete missing fields."""
    print("Agent: Let's gather some details about your target.")
    
    while memory_state["missing_fields"]:
        missing_fields = memory_state["missing_fields"]    
    
        dynamic_prompt = create_dynamic_prompt("", missing_fields)

        # Ask the user for the missing field
        user_input = input(f"Agent: Please provide details about {', '.join(missing_fields)}.\nUser: ")
        
        # Update the prompt with user input
        full_prompt = create_dynamic_prompt(user_input, missing_fields)

        # Extract the user input
        extracted_data = extract_user_fields(full_prompt, model)
        
        # Update the memory state
        memory_state = update_memory_with_response(memory_state, extracted_data)
        
        # Log the conversation
        memory_state["conversation"].append({"role": "user", "content": user_input})
        memory_state["conversation"].append({"role": "agent", "content": f"Extracted: {extracted_data}"})
        print("Memory State:", memory_state)
        print("Agent: Thank you! All fields are complete.")

    return memory_state

In [246]:
validation_prompt = create_dynamic_prompt(user_input, ["company_size", "location"])
validation_response = model.predict(validation_prompt)

In [247]:
extracted_data = extract_user_fields(user_input, model)
refined_query = create_dynamic_prompt("", ["industry", "location"])

{'user_info': {'company_size': None, 'industry': None, 'location': None, 'job_roles': []}, 'missing_fields': ['company_size', 'industry', 'location', 'job_roles']}


## Query Matching and storage

In [248]:
def query_matching(user_input, vector_store, expected_responses, model, similarity_threshold=0.75):
    """
    Matches user input against a vector store of predefined questions and handles fallbacks.

    Args:
        user_input (str): The user's query or input.
        vector_store (VectorStore): The vector store containing the QA dataset questions.
        expected_responses (dict): A dictionary mapping questions to their expected responses.
        model (ChatOpenAI): The AI model instance for generating reformulated responses.
        similarity_threshold (float): The minimum similarity score to consider a match valid.

    Returns:
        str: A response for the user or a fallback message if no match is found.
    """
    try:
        # Step 1: Perform similarity search
        matches = vector_store.similarity_search_with_score(user_input, k=1)
        if matches:
            match, score = matches[0]
            print(f"DEBUG: Match Found: {match.page_content}, Similarity Score: {score}")

            # Step 2: Check if the similarity score meets the threshold
            if score >= similarity_threshold:
                matched_question = match.page_content
                predefined_response = expected_responses.get(
                    matched_question,
                    "I couldn't find a predefined response for this question."
                )

                # Step 3: Reformulate the response dynamically
                reformulated_response = model.predict(
                    f"Reformulate the following response to make it more engaging: {predefined_response}"
                )
                return reformulated_response.strip()

        # Step 4: If no valid match, provide fallback guidance
        print("DEBUG: No valid match found or low similarity score.")
        return "I'm sorry, I couldn't find a direct answer. Could you provide more details about your request?"
    
    except Exception as e:
        print(f"Error in query_matching: {e}")
        return "I'm sorry, something went wrong while processing your request."


In [249]:
from langchain.vectorstores import FAISS
from langchain_cohere import CohereEmbeddings
import json
import pandas as pd

# Load the dataset
qa_dataset = pd.read_csv('logic/data/customer_leads_agent_qa_data_csv.csv', delimiter=',', encoding='latin1').to_dict(orient='records')

# Convert questions into embeddings
embeddings = CohereEmbeddings(cohere_api_key="QTIAR07ZVhcAVAPrUTHQozivAbRFhmhdoWwPsclg", model="embed-english-light-v3.0" )

questions = [item["prompt_text"] for item in qa_dataset]
vector_store = FAISS.from_texts(questions, embeddings)
    
# Map each vector to its expected response
responses = {item["prompt_text"]: item["expected_response"] for item in qa_dataset}


In [250]:
# User's input query
user_query = "What unique value do they offer?"

# Query matching
response = query_matching(user_query, vector_store, responses, model, similarity_threshold=0.3)

# Print the response
print("Agent's Response:", response)


DEBUG: Match Found: "What unique value do they offer?", Similarity Score: 0.7451424598693848
Agent's Response: Looking to leverage their expertise in [strengths], this company brings a distinctive advantage to the table. Their capabilities in this area are truly remarkable and set them apart from the competition.


In [251]:
def merge_memory(current_memory, new_memory):
    for key, value in new_memory.items():
        if isinstance(value, dict) and key in current_memory:
            # Recursive merge for nested dictionaries
            merge_memory(current_memory[key], value)
        else:
            # Update scalar values or new keys
            current_memory[key] = value

In [252]:
# def handle_no_match(llm_memory_state, llm_model):
#     """Fallback to feedback loop when no match is found."""
#     print("Agent: I couldn't find a predefined match. Let's refine your query.")
#     llm_memory_state, llm_extracted_data = feedback_loop(memory_state=memory_state, model=llm_model)
#     return llm_memory_state, llm_extracted_data


In [253]:
import copy


def model_driven_feedback_loop(fcn_user_input, fcn_memory_state, model):
    """
    Handles feedback loop by using the model to manage memory and prompts dynamically.
    
    Args:
        fcn_user_input (str): The current user input.
        fcn_memory_state (dict): Dictionary containing collected data and missing fields.
        model (ChatOpenAI): Language model instance.
    
    Returns:
        updated_memory (dict): Updated memory state with new extracted data.
        next_prompt (str): Next prompt for the user.
    """
    try:
        # Prepare memory for the prompt
        memory_state_str = json.dumps(fcn_memory_state, indent=4)
        updated_memory = fcn_memory_state.get("updated_memory", {})
        updated_memory_str = json.dumps(updated_memory, indent=4)

        # Construct the prompt
        prompt = f"""
        Role: You are a Data Extraction Agent responsible for collecting specific business details iteratively. 
        # Objective
        Your task is to:
        1. Update the `memory_state` dictionary with any new information extracted from `user_input`.
        2. Identify any fields still missing and construct a `next_prompt` to ask the user for these details.
        3. If all required fields are complete, indicate this in the `next_prompt`.

        # Memory State:
        {memory_state_str}

        # User Input:
        {fcn_user_input}

        # Response Format
        Respond in the following JSON format:
        {{
            "updated_memory": {updated_memory_str},
            "next_prompt": "string"
        }}
        """
        
        # Debugging: Print the constructed prompt
        print(f"Prompt sent to model: {prompt}")
        
        # Generate the response
        response = model.predict(prompt)
        print(f"Response from model: {response}")

        # Parse the response
        response_data = json.loads(response)
        
        # Update memory
        fcn_updated_memory = copy.deepcopy(fcn_memory_state)
        merge_memory(fcn_updated_memory, response_data.get("updated_memory", {}))
        
        
        # Debugging: Check memory updates
        print("Original memory state:", json.dumps(fcn_memory_state, indent=4))
        print("Response data:", json.dumps(response_data, indent=4))
        print("Updated memory state:", json.dumps(fcn_updated_memory, indent=4))

        # Get the next prompt
        fcn_next_prompt = response_data.get("next_prompt", "All required fields are complete.")

        return fcn_updated_memory, fcn_next_prompt
    
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON response: {e}")
        return fcn_memory_state, "An error occurred. Please try again."
    except Exception as e:
        print(f"Unexpected error: {e}")
        return fcn_memory_state, "An error occurred. Please try again."

In [254]:
user_inputs_example = [
    "I am targeting companies in San Francisco.",
    "These companies should have 500+ employees.",
    "I want to reach CTOs and Product Managers."
]

memory_state_example = {
    "updated_memory": {
        "company_size": None,
        "industry": None,
        "location": None,
        "job_roles": []
    }
}




In [255]:
# Iterative feedback loop
for user_input in user_inputs_example:
    memory_state_example, next_prompt = model_driven_feedback_loop(
        fcn_user_input=user_input,
        fcn_memory_state=memory_state_example,
        model=model
    )
    print(f"Agent: {next_prompt}")
    print(f"Updated Memory State: {json.dumps(memory_state_example, indent=4)}")
    if "All required fields are complete" in next_prompt:
        break


Prompt sent to model: 
        Role: You are a Data Extraction Agent responsible for collecting specific business details iteratively. 
        # Objective
        Your task is to:
        1. Update the `memory_state` dictionary with any new information extracted from `user_input`.
        2. Identify any fields still missing and construct a `next_prompt` to ask the user for these details.
        3. If all required fields are complete, indicate this in the `next_prompt`.

        # Memory State:
        {
    "updated_memory": {
        "company_size": null,
        "industry": null,
        "location": null,
        "job_roles": []
    }
}

        # User Input:
        I am targeting companies in San Francisco.

        # Response Format
        Respond in the following JSON format:
        {
            "updated_memory": {
    "company_size": null,
    "industry": null,
    "location": null,
    "job_roles": []
},
            "next_prompt": "string"
        }
        
Response from

In [256]:
def chat_with_model(model_fcn, memory_state_fcn):
    """
    Simulates a chat between the user and the AI model using the model-driven feedback loop.
    
    Args:
        model_fcn (ChatOpenAI): The AI model instance.
        memory_state_fcn (dict): Tracks extracted data and missing fields.

    Returns:
        None: Continuously interacts with the user until all fields are collected.
    """
    print("Agent: Let's gather the necessary details. Feel free to provide information step by step.")
    
    debug_mode = True  # Set this flag to False in production    
    
    while True:
        try:
                 
            if memory_state_fcn["missing_fields"]:
                next_prompt_fcn = f"Please provide the {memory_state_fcn['missing_fields'][0]}."
            else:
                next_prompt_fcn = "All required fields are complete. Do you need any further assistance?"
 
       
            # Display the AI's next prompt to the user
            # Display the AI's next prompt to the user
            print(f"Agent: {next_prompt_fcn}")


            # Get user input
            user_input = input("User: ")
            
                        # Exit condition
            if user_input.lower() in ["exit", "quit", "done"]:
                print("Agent: Thank you for the information. Goodbye!")
                break
                
                
            if not user_input.strip():
                print("Agent: I didn't catch that. Could you provide more details?")
                continue

            # Match the user query to a predefined response
            response = query_matching(user_input, vector_store, responses)
            
            if response:
                print("Agent:", response)
            else:
                # Run the feedback loop with the user's input
                memory_state_fcn, next_prompt_fcn = model_driven_feedback_loop(
                    fcn_user_input=user_input,
                    fcn_memory_state=memory_state_fcn,
                    model=model_fcn
                )
        
        
            # Debugging: Log the updated memory state if debugging is enabled
            if debug_mode:
                print(f"Updated Memory State: {json.dumps(memory_state_fcn, indent=4)}")

            # Check if all fields are complete
            if "All required fields are complete" in next_prompt_fcn:
                print(f"Agent: {next_prompt_fcn}")
                print(f"Final Memory State: {json.dumps(memory_state_fcn, indent=4)}")
                break

        except Exception as e:
            print(f"An error occurred: {e}")
            print("Agent: I encountered an issue. Please try again.")
            break

In [257]:
def initialize_agent():
    """
    Initializes the model and memory state for the chat agent.
    Returns:
        model_fcn (ChatCohere): The AI model instance.
        memory_state_fcn (dict): The initial memory state.
    """
    # Initialize model
    model_fcn = ChatCohere(cohere_api_key="QTIAR07ZVhcAVAPrUTHQozivAbRFhmhdoWwPsclg", temperature=0.1)

    # Initialize memory state
    memory_state_fcn = {
        "updated_memory": {
            "company_size": None,
            "industry": None,
            "location": None,
            "job_roles": []
        },
        "next_prompt": "How may I help you today?"
    }

    return model_fcn, memory_state_fcn


In [258]:
initial_memory_state = {"missing_fields": ["user_name"], "next_prompt": "What is your name?"}


In [259]:
# if __name__ == "__main__":    
#    chat_with_model(model_fcn=model, memory_state_fcn=initial_memory_state)


In [260]:
def get_dynamic_prompt(field):
    """
    Generates a dynamic prompt for the given field.
    """
    prompts = {
        "company_size": "What is the size of the companies you're targeting? For example: 1-10, 50-200, 500+.",
        "industry": "Which industry are you targeting? For example: Tech, Healthcare, or Finance.",
        "location": "Where are these companies located? Provide a city, state, or country.",
        "job_roles": "What roles or job titles are you targeting? For example: CTO, Product Manager."
    }
    return prompts.get(field, "Please provide more details about the required field.")


In [261]:
def validate_response(response, field):
    """
    Validates user responses based on the field requirements.
    """
    if field == "company_size" and not any(char.isdigit() for char in response):
        return False
    if field == "location" and len(response.split()) < 2:  # Example: Expect 'City, State'
        return False
    if field == "job_roles" and len(response.split(",")) < 1:  # Expect a list of roles
        return False
    return True

In [262]:
def suggest_next_steps():
    """
    Suggests the next steps after all fields have been collected.
    """
    print("Agent: Great! Here’s what you can do next:")
    print("- Use LinkedIn or Sales Navigator to search for companies matching these criteria.")
    print("- Enrich your lead data with contact details using tools like Hunter.io.")
    print("- Consolidate the results into a lead list for outreach.")


In [263]:
def clean_memory():
    """
    Resets the memory state to start a new conversation.
    """
    global memory_state
    memory_state = {
        "conversation_started": False,
        "missing_fields": ["company_size", "industry", "location", "job_roles"],
        "extracted_data": {}
    }


In [264]:
def chat_flow(model, vector_store, expected_responses):
    """
    Handles the conversation to collect required fields, address predefined questions, and clean memory afterward.
    """
    global memory_state  # Ensure memory state can be reset if needed

    print("Agent: Welcome! Let’s start by gathering some details.")

    while memory_state["missing_fields"]:
        # Step 1: Focus on the next missing field
        missing_field = memory_state["missing_fields"][0]
        prompt = get_dynamic_prompt(missing_field)
        print(f"Agent: {prompt}")

        # Step 2: Capture user response
        user_input = input("Your response: ").strip()
        if user_input.lower() in ["exit", "quit", "done"]:
            print("Agent: Thank you for the information. Goodbye!")
            break

        # Step 3: Fallback for predefined questions
        response = query_matching(user_input, vector_store, expected_responses, model)
        if "Sorry" not in response:  # Valid predefined match
            print("Agent:", response)
            continue

        # Step 4: Validate and update memory state for missing fields
        if validate_response(user_input, missing_field):
            memory_state["extracted_data"][missing_field] = user_input
            memory_state["missing_fields"].remove(missing_field)
            print(f"Agent: Got it! {missing_field} updated to: {user_input}.")
        else:
            print("Agent: That doesn’t seem right. Could you clarify?")
            continue

    # Step 5: Provide next steps and reset memory
    print("Agent: All required fields have been collected successfully!")
    print(f"Collected Information: {memory_state['extracted_data']}")
    suggest_next_steps()
    clean_memory()
    print("Agent: Memory cleaned. Ready for a new conversation.")


In [265]:
# Initialize memory state
memory_state = {
    "conversation_started": False,
    "missing_fields": ["company_size", "industry", "location", "job_roles"],
    "extracted_data": {}
}

# Start the conversation
# chat_flow(model, vector_store, responses)

In [266]:
import streamlit as st

# Initialize memory state
memory_state = {
    "conversation_started": False,
    "missing_fields": ["company_size", "industry", "location", "job_roles"],
    "extracted_data": {}
}

# Streamlit app configuration
st.title("AI Lead Generation Assistant")
st.sidebar.title("Conversation Settings")
start_new_session = st.sidebar.button("Start New Session")


# Clean memory if a new session is started
if start_new_session or not memory_state["conversation_started"]:
    memory_state = {
        "conversation_started": True,
        "missing_fields": ["company_size", "industry", "location", "job_roles"],
        "extracted_data": {}
    }
    st.session_state["conversation_history"] = []


# Chat display
st.write("Welcome! Let’s start by gathering some details.")
if "conversation_history" not in st.session_state:
    st.session_state["conversation_history"] = []

for message in st.session_state["conversation_history"]:
    st.write(message)

2024-11-25 13:54:55.743 
  command:

    streamlit run C:\Users\USER\AppData\Local\Programs\Python\Python311\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2024-11-25 13:54:55.743 Session state does not function when running a script without `streamlit run`


In [267]:
# Input box
user_input = st.text_input("Your response:", key="user_input")
if st.button("Send"):
    if user_input.strip():
        # Add user input to the conversation
        st.session_state["conversation_history"].append(f"You: {user_input}")

        # Check for predefined responses
        response = query_matching(user_input, vector_store, responses, model)
        if "Sorry" not in response:  # Valid predefined match
            st.session_state["conversation_history"].append(f"Agent: {response}")
        else:
            # Handle missing fields
            if memory_state["missing_fields"]:
                missing_field = memory_state["missing_fields"][0]
                if validate_response(user_input, missing_field):
                    memory_state["extracted_data"][missing_field] = user_input
                    memory_state["missing_fields"].remove(missing_field)
                    st.session_state["conversation_history"].append(
                        f"Agent: Got it! {missing_field} updated to: {user_input}."
                    )
                else:
                    st.session_state["conversation_history"].append(
                        "Agent: That doesn’t seem right. Could you clarify?"
                    )
            else:
                # Suggest next steps
                suggest_next_steps()
                st.session_state["conversation_history"].append(
                    "Agent: All required fields have been collected successfully!"
                )
                st.session_state["conversation_history"].append(
                    "Agent: Memory cleaned. Ready for a new conversation."
                )
                clean_memory()


