<a href="https://colab.research.google.com/github/AliAbdallah21/RAG-Icebreaker-Gradio-ChatBot/blob/main/AI_Icebreaker_Bot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# @title All the pip installs
%%capture
!pip install chromadb
!pip install llama_index
!pip install llama-index-core.
!pip install llama-index-vector-stores-chroma
!pip install llama-index-embeddings-huggingface
!pip install sentence-transformers
!pip install PyMuPDF
!pip install gradio
!pip install transformers
!pip install torch
!pip install requests
print("LlamaIndex and its integrations installed.")

In [28]:
# @title All the imports
import gradio as gr
from llama_index.core import VectorStoreIndex, StorageContext, SimpleDirectoryReader,Document
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import get_response_synthesizer
from llama_index.core.settings import Settings
from llama_index.llms.openai import OpenAI
from llama_index.readers.file import PyMuPDFReader
from llama_index.core import PromptTemplate
from llama_index.core.chat_engine import CondenseQuestionChatEngine # Import CondenseQuestionChatEngine
import os
from google.colab import userdata
import chromadb
import requests
import json
import time

In [3]:
# @title OpenAI API Key Setup
try:
    os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')
    print("OpenAI API key loaded from Colab secrets.")
except Exception as e:
    print(f"Error loading OpenAI API key from Colab secrets: {e}")
    print("Please ensure your 'OPENAI_API_KEY' secret is set in Colab and enabled for this notebook.")
    raise SystemExit("API Key not found. Exiting.")


OpenAI API key loaded from Colab secrets.


In [18]:
# @title Data Acquisition using the Bright Data API
def fetch_linkedin_profile_brightdata(linkedin_url: str, brightdata_api_key: str, use_mock_data: bool = False) -> dict:
    if use_mock_data:
        print("Using mock data for LinkedIn profile.")
        mock_data = {
            "id": "mock-user-123",
            "name": "Mock User Name",
            "city": "Mock City, Mock Country",
            "country_code": "XX",
            "position": "Lead AI Engineer at MockCorp | Generative AI Specialist",
            "current_company": {
                "name": "MockCorp",
                "company_id": "mockcorp",
                "title": "Lead AI Engineer",
                "location": None
            },
            "experience": [
                {
                    "title": "Lead AI Engineer",
                    "description": "Developed scalable AI solutions and drove innovation in the tech industry. Specialized in deep learning and natural language processing.",
                    "start_date": "Jan 2022",
                    "end_date": "Present",
                    "company": "MockCorp",
                    "company_id": "mockcorp",
                    "url": "https://www.linkedin.com/company/mockcorp"
                },
                {
                    "title": "Senior Data Scientist",
                    "description": "Built predictive models and analyzed large datasets to derive business insights. Focused on machine learning algorithms and data visualization.",
                    "start_date": "Sep 2019",
                    "end_date": "Dec 2021",
                    "company": "InnovateAI",
                    "company_id": "innovateai",
                    "url": "https://www.linkedin.com/company/innovateai"
                }
            ],
            "url": linkedin_url,
            "educations_details": "Mock University",
            "education": [
                {
                    "title": "Mock University",
                    "degree": "Master's Degree",
                    "field": "Artificial Intelligence",
                    "start_year": "2017",
                    "end_year": "2019"
                }
            ],
            "avatar": "https://placehold.co/200x200/cccccc/ffffff?text=Mock",
            "followers": 100,
            "connections": 150,
            "projects": [
                {
                    "title": "AI-Powered Recommendation System",
                    "start_date": "Mar 2023",
                    "end_date": "Aug 2023",
                    "description": "Developed a novel recommendation engine using collaborative filtering and deep learning techniques, resulting in a 15% increase in user engagement."
                }
            ],
            "location": "Mock City",
            "input_url": linkedin_url,
            "linkedin_id": "mock-user-123",
            "activity": [
                {
                    "interaction": "Liked by Mock User",
                    "link": "https://www.linkedin.com/feed/update/mock-post-1",
                    "title": "Exciting advancements in Generative AI!",
                    "img": None,
                    "id": "mock-activity-1"
                }
            ]
        }
        return mock_data

    # --- Trigger API ---
    brightdata_trigger_api_url = "https://api.brightdata.com/datasets/v3/trigger"
    dataset_id = "gd_l1viktl72bvl7bjuj0" # Your specific dataset ID
    trigger_query_params = {
        "dataset_id": dataset_id,
        "include_errors": "true"
    }
    auth_headers = {
        'Authorization': f'Bearer {brightdata_api_key}',
        'Content-Type': 'application/json'
    }
    trigger_payload = [
        {"url": linkedin_url}
    ]

    try:
        print(f"Fetching profile for: {linkedin_url}")
        response = requests.post(brightdata_trigger_api_url, headers=auth_headers, json=trigger_payload, params=trigger_query_params)
        response.raise_for_status()
        trigger_response = response.json()

        snapshot_id = trigger_response.get('snapshot_id')
        if not snapshot_id:
            print("ERROR: No snapshot_id returned from Bright Data trigger API.")
            return {}

        print(f"Collection triggered. Snapshot ID: {snapshot_id}. Polling for status...")

        # --- Polling for Snapshot Status ---
        brightdata_snapshots_list_url = "https://api.brightdata.com/datasets/v3/snapshots"
        snapshots_list_params = {
            "dataset_id": dataset_id,
            "status": "ready"
        }

        max_polling_attempts = 60 # 10 minutes total
        polling_interval_seconds = 10
        found_ready_snapshot = False

        for i in range(max_polling_attempts):
            time.sleep(polling_interval_seconds)

            list_response = requests.get(brightdata_snapshots_list_url, headers=auth_headers, params=snapshots_list_params)
            list_response.raise_for_status()
            snapshots_data = list_response.json()

            for snapshot in snapshots_data:
                if snapshot.get('id') == snapshot_id and snapshot.get('status') == 'ready':
                    found_ready_snapshot = True
                    break

            if found_ready_snapshot:
                print(f"✅ Snapshot {snapshot_id} is ready!")
                break
            else:
                print(f"Polling... ({i+1}/{max_polling_attempts})")

        if not found_ready_snapshot:
            print(f"❌ Snapshot {snapshot_id} did not become ready after {max_polling_attempts} attempts.")
            return {}

        # --- Retrieve Data Directly from Snapshot Metadata Endpoint ---
        brightdata_snapshot_data_url = f"https://api.brightdata.com/datasets/v3/snapshot/{snapshot_id}"
        print(f"Retrieving data from Bright Data...")

        data_response = requests.get(brightdata_snapshot_data_url, headers=auth_headers)
        data_response.raise_for_status()

        retrieved_data = data_response.json()

        if isinstance(retrieved_data, dict) and retrieved_data.get('name') and retrieved_data.get('linkedin_id'):
            print("✅ Data retrieved successfully!")
            return retrieved_data
        else:
            print("❌ Retrieved data is empty or not in expected profile format (dict with 'name'/'linkedin_id').")
            return {}

    except requests.exceptions.RequestException as e:
        print(f"❌ Request failed: {e}")
        if 'response' in locals() and response is not None:
            print(f"Bright Data Error Response Status Code: {response.status_code}")
            print(f"Bright Data Error Response Text: {response.text}")
        return {}
    except json.JSONDecodeError as e:
        print(f"❌ Failed to decode JSON: {e}")
        if 'response' in locals() and response is not None:
            print(f"Raw response that failed JSON decode: {response.text}")
        return {}


In [19]:
# @title Test Data Acquisition
# IMPORTANT: Replace with a real, public LinkedIn profile URL for testing.
# For GitHub, leave this as a placeholder or a non-personal example.
test_linkedin_url = "https://www.linkedin.com/in/example-user-profile/" # Changed to a generic placeholder URL

# Get your Bright Data API Key from Colab Secrets
brightdata_key_from_secrets = userdata.get('BRIGHTDATA_API_KEY')

# --- Choose your testing mode ---
# Set to True to force mock data. Set to False to try real data first.
USE_MOCK_DATA_FOR_TEST = True # Set to True for easier testing without Bright Data credits

profile_data = {} # Initialize profile_data to an empty dict

if not USE_MOCK_DATA_FOR_TEST and brightdata_key_from_secrets:
    print("Attempting to fetch real LinkedIn profile data...")
    profile_data = fetch_linkedin_profile_brightdata(
        linkedin_url=test_linkedin_url,
        brightdata_api_key=brightdata_key_from_secrets,
        use_mock_data=False
    )
    if profile_data:
        print("Successfully fetched real LinkedIn profile data!")
    else:
        print("Failed to fetch real LinkedIn profile data. Falling back to mock data.")
        profile_data = fetch_linkedin_profile_brightdata(
            linkedin_url=test_linkedin_url,
            brightdata_api_key=brightdata_key_from_secrets, # Key not used for mock, but function expects it
            use_mock_data=True # Use mock data for fallback
        )
else:
    print("Bright Data API key not found in Colab Secrets or USE_MOCK_DATA_FOR_TEST is True. Using mock data.")
    profile_data = fetch_linkedin_profile_brightdata(
        linkedin_url=test_linkedin_url,
        brightdata_api_key=brightdata_key_from_secrets, # Key not used for mock, but function expects it
        use_mock_data=True # Use mock data if key not found or forced
    )

if profile_data:
    print("\nProfile data is now available (either real or mock).")
    print(f"Name: {profile_data.get('name', 'N/A')}")
    print(f"Headline: {profile_data.get('position', 'N/A')}")
    print(f"Number of experiences: {len(profile_data.get('experience', []))}")
    print("\nFull Profile Data Structure (first 500 chars):")
    print(json.dumps(profile_data, indent=2)[:500] + "...")
else:
    print("CRITICAL: Profile data is still empty. Cannot proceed.")


Bright Data API key not found in Colab Secrets or USE_MOCK_DATA_FOR_TEST is True. Using mock data.
Using mock data for LinkedIn profile.

Profile data is now available (either real or mock).
Name: Mock User Name
Headline: Lead AI Engineer at MockCorp | Generative AI Specialist
Number of experiences: 2

Full Profile Data Structure (first 500 chars):
{
  "id": "mock-user-123",
  "name": "Mock User Name",
  "city": "Mock City, Mock Country",
  "country_code": "XX",
  "position": "Lead AI Engineer at MockCorp | Generative AI Specialist",
  "current_company": {
    "name": "MockCorp",
    "company_id": "mockcorp",
    "title": "Lead AI Engineer",
    "location": null
  },
  "experience": [
    {
      "title": "Lead AI Engineer",
      "description": "Developed scalable AI solutions and drove innovation in the tech industry. Specialized in deep...


In [42]:
# @title Convert Profile to LlamaIndex Document
def convert_profile_to_document(profile_data: dict) -> Document:
    if not profile_data:
        return Document(text="No profile data available.", metadata={})

    # This function is purely dependent on its input argument 'profile_data'

    full_name = profile_data.get('name', 'N/A')
    headline = profile_data.get('position', 'N/A')

    experiences_text = ""
    if 'experience' in profile_data and profile_data['experience']:
        experiences_text = "Experiences:\n"
        for exp in profile_data['experience']:
            company = exp.get('company', 'N/A')
            title = exp.get('title', 'N/A')
            start = exp.get('start_date', 'N/A')
            end = exp.get('end_date', 'Present')
            description = exp.get('description', '').replace('\n', ' ').strip()
            experiences_text += f"- {title} at {company} ({start} - {end}). Description: {description}\n"

    education_text = ""
    if 'education' in profile_data and profile_data['education']:
        education_text = "Education:\n"
        for edu in profile_data['education']:
            school = edu.get('title', 'N/A')
            degree = edu.get('degree', 'N/A')
            field = edu.get('field', 'N/A')
            education_text += f"- {degree} in {field} from {school}\n"

    projects_text = ""
    if 'projects' in profile_data and profile_data['projects']:
        projects_text = "Projects:\n"
        for proj in profile_data['projects']:
            title = proj.get('title', 'N/A')
            description = proj.get('description', '').replace('\n', ' ').strip()
            projects_text += f"- Project: {title}. Description: {description}\n"

    activity_text = ""
    if 'activity' in profile_data and profile_data['activity']:
        activity_text = "Recent Activity (Liked Posts/Interactions):\n"
        for act in profile_data['activity']:
            interaction = act.get('interaction', 'N/A')
            title = act.get('title', 'N/A')
            if title:
                activity_text += f"- {interaction}: \"{title}\"\n"

    document_content = (
        f"LinkedIn Profile: {full_name}\n"
        f"Headline: {headline}\n\n"
        f"{experiences_text}\n"
        f"{education_text}\n"
        f"{projects_text}\n"
        f"{activity_text}"
    )

    metadata = {
        "full_name": full_name,
        "headline": headline,
        "linkedin_url": profile_data.get('url', 'N/A'),
        "source": "LinkedIn Profile Data Collector (Bright Data)"
    }

    return Document(text=document_content, metadata=metadata)

# The following 'if' block is for standalone testing in Colab, not used by Gradio's call to this function
if 'profile_data' in locals() and profile_data:
    linkedin_document = convert_profile_to_document(profile_data)
    print("Converted LinkedIn profile to LlamaIndex Document.")
else:
    print("No profile data available to convert to LlamaIndex Document. Please run Cell 5.")


Converted LinkedIn profile to LlamaIndex Document.


In [21]:
# @title Node Parsing (Chunking the Document)
# Ensure 'linkedin_document' is defined from Cell 6
if 'linkedin_document' in locals() and linkedin_document:
    documents_to_parse = [linkedin_document]

    node_parser = SentenceSplitter(chunk_size=512, chunk_overlap=20)
    nodes = node_parser.get_nodes_from_documents(documents_to_parse)

    print(f"Parsed document into {len(nodes)} nodes (chunks).")
else:
    print("No LinkedIn document available to parse into nodes. Please ensure Cell 6 ran successfully.")


Parsed document into 1 nodes (chunks).


In [22]:
# @title Embedding Model Setup
embed_model = HuggingFaceEmbedding(
    model_name = "sentence-transformers/all-MiniLM-L6-v2"
)

# IMPORTANT: ChromaDB client and storage_context initialization
# are now moved into the Gradio function (Cell 11) and will use an IN-MEMORY client
# to avoid 'readonly database' errors.

print("Embedding model initialized.")


Embedding model initialized.


In [23]:
# @title Configure LLM for LlamaIndex
# Configure the OpenAI LLM for LlamaIndex
llama_index_openai_llm = OpenAI(
    model="gpt-3.5-turbo",
    temperature=0.7,
    max_tokens=512,
    api_key=os.environ.get('OPENAI_API_KEY')
)

# Set the default LLM and embedding model for LlamaIndex globally
Settings.llm = llama_index_openai_llm
Settings.embed_model = embed_model

# IMPORTANT: VectorStoreIndex creation is now handled within the Gradio function (Cell 11)
# or for standalone testing in Cell 10.

print("LlamaIndex LLM configured.")


LlamaIndex LLM configured.


In [24]:
# @title Global Icebreaker Prompt Definition
# The prompt is now more generic, focusing on broader professional aspects.
global_icebreaker_prompt_string = """
You are an AI assistant specialized in generating personalized icebreakers for networking events.
Your goal is to help someone start a conversation by referencing details from a person's LinkedIn profile.
Generate 1-2 unique and engaging icebreaker questions or statements.

**Focus on their:**
- Current or past roles and responsibilities
- Companies they've worked for
- Overall professional journey or career highlights
- General areas of expertise or skills
- Educational background

Avoid overly specific project details or recent social media activities unless they are highly prominent.
Do not ask generic questions like "What do you do?" or "How are you?".
Make it sound natural, conversational, and demonstrate you've genuinely reviewed their profile.

Profile Information:
{context_str}

Generate an icebreaker for this person:
"""

global_icebreaker_prompt_template = PromptTemplate(global_icebreaker_prompt_string)

print("Global icebreaker prompt defined (more generic).")


Global icebreaker prompt defined (more generic).


In [26]:
# @title Generate Icebreaker using LlamaIndex Query Engine (Standalone Test)

# Removed: from llama_index.core.prompts import PromptTemplate (now in Cell 2)

# --- Re-create VectorStoreIndex for standalone testing in this cell ---
# This uses the globally defined embed_model.
if 'nodes' in locals() and nodes:
    # Re-initialize ChromaDB client and collection for standalone run
    db_standalone = chromadb.Client() # Use in-memory client for standalone test
    chroma_collection_standalone = db_standalone.get_or_create_collection("linkedin_icebreaker_collection_standalone")
    vector_store_standalone = ChromaVectorStore(chroma_collection=chroma_collection_standalone)
    storage_context_standalone = StorageContext.from_defaults(vector_store=vector_store_standalone)

    index = VectorStoreIndex(
        nodes = nodes,
        storage_context=storage_context_standalone, # Use standalone storage context
        embed_model=embed_model # Use the globally initialized embed_model
    )
    print("Standalone VectorStoreIndex created for Cell 10.")
else:
    print("Nodes not available to create standalone VectorStoreIndex. Please ensure Cell 7 ran successfully.")
    # Exit if nodes are not available, as the rest of the cell won't work
    raise SystemExit("Cannot proceed with icebreaker generation in Cell 10 without nodes.")


# Use the globally defined prompt template (from Cell 9.5)
query_engine = index.as_query_engine(
    response_mode="compact",
    text_qa_template=global_icebreaker_prompt_template # Use the global prompt
)

# The 'query' here is not a search query, but an instruction to the LLM
# to generate an icebreaker using the context it retrieves.
# Adjusted for more generic focus, matching the new global prompt.
query_for_icebreaker = "Generate a personalized icebreaker question or statement for this person, focusing on their professional background or key achievements."

# Get the response (the generated icebreaker)
response = query_engine.query(query_for_icebreaker)

print("\n--- Generated Icebreaker ---")
print(response.response)
print("--------------------------")

#You can also access the source nodes that were used to generate the response
print("\n--- Source Nodes Used ---") # Commented out for cleaner output, uncomment if needed
for node in response.source_nodes:
    print(f"Score: {node.score:.2f}")
    print(f"Text: {node.text[:200]}...")
    print("-" * 20)


Standalone VectorStoreIndex created for Cell 10.

--- Generated Icebreaker ---
"Hi Ali, I see you're a Computer Science Major with a focus on AI/ML at MIU. Your recent specialization in Meta Back-End Development caught my eye! How has that certification impacted your approach to software and machine learning development?"
--------------------------

--- Source Nodes Used ---
Score: 0.25
Text: LinkedIn Profile: Mock User Name
Headline: Lead AI Engineer at MockCorp | Generative AI Specialist

Experiences:
- Lead AI Engineer at MockCorp (Jan 2022 - Present). Description: Developed scalable AI...
--------------------
Score: 0.25
Text: 😄 Last week, I opened Manus just to play around with the 2,000 credits I’d earned…"
- Liked by Ali Abdallah: "ازاي تلاقي شغل ريموتلي؟ (من البيت) هقولك على أهم المواقع اللي فعلاً بتساعد ناس تلاقي شغل ر...
--------------------


In [45]:
# @title Gradio Interface Function

# Global variables to store the index and chat engine
current_profile_index = None
current_chat_engine = None

def generate_icebreaker_and_setup_chat(linkedin_url: str):
    """
    Generates an icebreaker and sets up the chat engine for the given URL.
    This function will be called by the Gradio interface.
    """
    global current_profile_index, current_chat_engine

    brightdata_key = userdata.get('BRIGHTDATA_API_KEY')
    if not brightdata_key:
        # Return initial state for outputs, including hiding chat components
        # (7 outputs: icebreaker_output, profile_index_state, chat_engine_state, chatbot_component, textbox_component, chat_submit_button, chat_clear_button)
        return "Error: Bright Data API key not found.", None, None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)

    try:
        # Use in-memory ChromaDB client for each request
        db_gradio = chromadb.Client() # Use in-memory client
        chroma_collection_gradio = db_gradio.get_or_create_collection("linkedin_icebreaker_collection")

        # This ensures no old data from previous runs of the Gradio app persists in memory.
        try:
            chroma_collection_gradio.delete(ids=chroma_collection_gradio.get()['ids'])
            print("DEBUG: Cleared existing data in in-memory ChromaDB collection.")
        except Exception as e:
            print(f"WARNING: Could not clear ChromaDB collection: {e}")


        vector_store_gradio = ChromaVectorStore(chroma_collection=chroma_collection_gradio)
        storage_context_gradio = StorageContext.from_defaults(vector_store=vector_store_gradio)

        gr.Info("Fetching LinkedIn profile data... This may take a few minutes.")
        profile_data_for_current_request = fetch_linkedin_profile_brightdata(
            linkedin_url=linkedin_url,
            brightdata_api_key=brightdata_key,
            use_mock_data=False # Always try real data for Gradio
        )

        if not profile_data_for_current_request:
            return "Failed to fetch LinkedIn profile data. Please check the URL or try again later.", None, None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)

        # --- DEBUG: Confirm which profile data is being processed ---
        print(f"DEBUG: Profile name from fetched data: {profile_data_for_current_request.get('name', 'N/A')}")

        gr.Info("Profile data fetched. Converting to LlamaIndex Document...")
        linkedin_document = convert_profile_to_document(profile_data_for_current_request)

        gr.Info("Document converted. Parsing into nodes...")
        nodes = node_parser.get_nodes_from_documents([linkedin_document])
        if not nodes:
            return "Error: Could not parse profile data into readable chunks.", None, None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)

        gr.Info(f"Parsed into {len(nodes)} nodes. Creating VectorStoreIndex...")
        index_gradio = VectorStoreIndex(
            nodes=nodes,
            storage_context=storage_context_gradio,
            embed_model=embed_model
        )
        current_profile_index = index_gradio # Store globally for chat

        gr.Info("VectorStoreIndex created. Generating icebreaker...")

        # Generate Icebreaker (using the global, more generic prompt)
        query_engine_gradio = index_gradio.as_query_engine(
            response_mode="compact",
            text_qa_template=global_icebreaker_prompt_template # Use the global prompt
        )

        # Adjusted for more generic focus.
        query_for_icebreaker = "Generate a personalized icebreaker question or statement for this person, focusing on their professional background or key achievements."

        icebreaker_response = query_engine_gradio.query(query_for_icebreaker)

        gr.Info("Icebreaker generated! Setting up chat...")

        current_chat_engine = CondenseQuestionChatEngine.from_defaults(
            query_engine=index_gradio.as_query_engine(),
            llm=llama_index_openai_llm,
            verbose=False
        )

        # --- FIX: Return exactly 7 values ---
        return icebreaker_response.response, \
               index_gradio, \
               current_chat_engine, \
               gr.update(visible=True), \
               gr.update(visible=True), \
               gr.update(visible=True), \
               gr.update(visible=True) # This is the 7th output

    except Exception as e:
        print(f"ERROR: An error occurred during icebreaker generation or chat setup: {e}")
        # Ensure consistent return of 7 values even on error
        return f"An error occurred: {e}. Please check the Colab logs for details.", \
               None, None, \
               gr.update(visible=False), \
               gr.update(visible=False), \
               gr.update(visible=False), \
               gr.update(visible=False) # This is the 7th output on error


def chat_with_profile(message: str, history: list, chat_engine_state: CondenseQuestionChatEngine):
    """
    Handles conversational chat with the LLM using the indexed profile.
    """
    if not chat_engine_state:
        return "Please generate an icebreaker first by submitting a LinkedIn URL."

    try:
        response = chat_engine_state.chat(message)
        # --- FIX: Append messages in the required {"role": "...", "content": "..."} format ---
        history.append({"role": "user", "content": message})
        history.append({"role": "assistant", "content": str(response)})
        return history, "" # Return history and clear textbox
    except Exception as e:
        print(f"ERROR: An error occurred during chat: {e}")
        # --- FIX: Append error message in the required {"role": "...", "content": "..."} format ---
        history.append({"role": "user", "content": message})
        history.append({"role": "assistant", "content": f"Error: {e}"})
        return history, "" # Return history and clear textbox even on error

print("Gradio interface function defined.")


Gradio interface function defined.


In [46]:
# @title Launch Gradio Interface with Chat

# Global variables to store the index and chat engine
# These are managed by the functions in Cell 11.
current_profile_index = None
current_chat_engine = None

# The generate_icebreaker_and_setup_chat and chat_with_profile functions
# are defined in Cell 11 and are accessible here.

with gr.Blocks(theme="soft") as demo:
    gr.Markdown("# LinkedIn Icebreaker & Chat Bot")
    gr.Markdown("Enter a public LinkedIn profile URL to generate a personalized icebreaker. Once generated, you can chat with the model about the profile!")

    profile_index_state = gr.State(None)
    chat_engine_state = gr.State(None)

    with gr.Row():
        linkedin_url_input = gr.Textbox(label="LinkedIn Profile URL", placeholder="e.g., https://www.linkedin.com/in/example-user-profile/", scale=2)
        submit_button = gr.Button("Generate Icebreaker", scale=1)

    icebreaker_output = gr.Textbox(label="Generated Icebreaker", interactive=False, lines=3)

    with gr.Column(visible=False) as chat_column: # Group chat components in a column, initially hidden
        chatbot_component = gr.Chatbot(height=300, type='messages') # Added type='messages' to suppress warning
        textbox_component = gr.Textbox(placeholder="Ask a follow-up question about the profile...", container=False, scale=7)
        with gr.Row(): # Group chat buttons in a row
            chat_submit_button = gr.Button("Ask")
            chat_clear_button = gr.Button("Clear Chat")

    # Link the chat components to a ChatInterface-like behavior
    textbox_component.submit(
        fn=chat_with_profile,
        inputs=[textbox_component, chatbot_component, chat_engine_state],
        outputs=[chatbot_component, textbox_component], # Clear textbox after submit
        queue=False
    )

    chat_submit_button.click(
        fn=chat_with_profile,
        inputs=[textbox_component, chatbot_component, chat_engine_state],
        outputs=[chatbot_component, textbox_component],
        queue=False
    )

    chat_clear_button.click(
        lambda: [], # Returns empty list to clear chatbot
        outputs=[chatbot_component],
        queue=False
    )

    submit_button.click(
        fn=generate_icebreaker_and_setup_chat,
        inputs=[linkedin_url_input],
        # --- FIX: Corrected outputs list to match the 7 values returned by the function ---
        outputs=[icebreaker_output, profile_index_state, chat_engine_state,
                 chatbot_component, textbox_component, chat_submit_button, chat_clear_button]
    ).success(
        # On success, update the visibility of the entire chat column AND clear the chatbot
        lambda: [gr.update(visible=True), []], # Return update for column visibility and empty list for chatbot
        outputs=[chat_column, chatbot_component]
    )

demo.launch(share=True, debug=True)


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://25c301449ae098ab9d.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


DEBUG: Cleared existing data in in-memory ChromaDB collection.
Fetching profile for: https://www.linkedin.com/in/sarah-helal/
Collection triggered. Snapshot ID: s_mdaymfq629makqus9x. Polling for status...
Polling... (1/60)
✅ Snapshot s_mdaymfq629makqus9x is ready!
Retrieving data from Bright Data...
✅ Data retrieved successfully!
DEBUG: Profile name from fetched data: Sarah Helal
Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://25c301449ae098ab9d.gradio.live


