In [4]:
# Add to your .env file:
# HF_TOKEN=your_hugging_face_token_here

# Then in your code:
import os
from huggingface_hub import login

# Login to Hugging Face
if os.getenv("HF_TOKEN"):
    login(token=os.getenv("HF_TOKEN"))

In [8]:
import os
import json
import re
import pandas as pd
from typing import List, Dict, Any, Optional, Union, Callable
from dataclasses import dataclass, field
import random

from dotenv import load_dotenv
import google.generativeai as genai
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.http.models import VectorParams, Distance, PointStruct
from rank_bm25 import BM25Okapi

load_dotenv()
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

# [Keep all your existing search tool classes - they're fine]
# StructuredVectorSearchTool, StructuredKeywordSearchTool, etc...
# (I'm skipping them for brevity but include them in your actual code)

# ===============================================
# ENHANCED AUTONOMOUS AI AGENT
# ===============================================

@dataclass
class AgentState:
    """Complete state of the agent's execution"""
    query: str
    thoughts: List[str] = field(default_factory=list)
    actions_taken: List[Dict[str, Any]] = field(default_factory=list)
    all_results: Dict[int, Dict[str, Any]] = field(default_factory=dict)  # Store unique experts
    conversation_history: List[str] = field(default_factory=list)
    iteration_count: int = 0
    final_answer: str = None
    search_performed: bool = False


class AutonomousExpertSearchAgent:
    """Fully autonomous AI agent for expert search"""
    
    def __init__(self, search_tools, llm_model="gemini-1.5-flash"):
        self.llm_model = llm_model
        self.search_tools = search_tools
        self.max_iterations = 8
        self.min_experts_threshold = 3
        self.quality_threshold = 0.4
        
    def run(self, query: str) -> AgentState:
        """Main execution - fully autonomous"""
        state = AgentState(query=query)
        
        # Check for simple conversation
        if self._is_general_conversation(query):
            state.final_answer = self._handle_conversation(query)
            return state
        
        # Expert search mode
        print("\n🤖 AI Agent activated. Analyzing your request...")
        
        # Phase 1: Initial Analysis
        self._think(state, f"Analyzing query: '{query}'")
        self._think(state, "This appears to be a request for finding experts.")
        
        # Phase 2: Search Strategy
        search_strategy = self._determine_search_strategy(query, state)
        
        # Phase 3: Execute Searches
        self._execute_search_strategy(search_strategy, query, state)
        
        # Phase 4: Analyze and Improve Results if Needed
        if len(state.all_results) < self.min_experts_threshold:
            self._think(state, f"Only found {len(state.all_results)} experts. Trying alternative approaches...")
            self._expand_search(query, state)
        
        # Phase 5: Generate Final Answer
        state.final_answer = self._generate_final_answer(state)
        
        return state
    
    def _is_general_conversation(self, query: str) -> bool:
        """Check if this is just a greeting or general chat"""
        greetings = ['hello', 'hi', 'hey', 'how are you', 'good morning', 'good afternoon']
        query_lower = query.lower().strip()
        
        # Simple greeting check
        if any(g in query_lower for g in greetings) and len(query_lower) < 30:
            return True
            
        # Check if it's asking for experts
        expert_indicators = ['expert', 'specialist', 'professional', 'find', 'search', 'looking for', 
                           'need', 'want', 'require', 'medicine', 'pharmacy', 'business']
        return not any(indicator in query_lower for indicator in expert_indicators)
    
    def _handle_conversation(self, query: str) -> str:
        """Handle general conversation"""
        try:
            prompt = f"Respond to this greeting or general query in a friendly, helpful way: {query}"
            model = genai.GenerativeModel(self.llm_model)
            response = model.generate_content(prompt)
            return response.text.strip()
        except:
            return "Hello! I'm an AI assistant specialized in finding experts. How can I help you today?"
    
    def _think(self, state: AgentState, thought: str):
        """Record a thought and display it"""
        state.thoughts.append(thought)
        print(f"💭 {thought}")
    
    def _record_action(self, state: AgentState, action: str, result: Any):
        """Record an action taken"""
        state.actions_taken.append({
            'action': action,
            'result_summary': self._summarize_result(result)
        })
        print(f"🔧 Executed: {action}")
    
    def _determine_search_strategy(self, query: str, state: AgentState) -> Dict[str, Any]:
        """Determine the best search strategy using LLM"""
        self._think(state, "Determining optimal search strategy...")
        
        prompt = f"""
Analyze this expert search query and determine the best search strategy:
Query: "{query}"

Consider:
1. What fields/domains are mentioned? (e.g., medicine, pharmacy, business)
2. Is this looking for specific expertise or general professionals?
3. Would project-based search help find relevant experts?

Respond with a JSON object:
{{
    "primary_terms": ["term1", "term2"],  // Main search terms
    "use_variants": true/false,  // Should we generate query variants?
    "search_projects": true/false,  // Should we search project database?
    "filters": {{}},  // Any specific filters to apply
    "approach": "comprehensive/targeted"  // Search approach
}}
"""
        
        try:
            model = genai.GenerativeModel(self.llm_model)
            response = model.generate_content(prompt)
            strategy = json.loads(response.text.strip())
            self._think(state, f"Strategy determined: {strategy['approach']} approach")
            return strategy
        except:
            # Fallback strategy
            return {
                "primary_terms": [query],
                "use_variants": True,
                "search_projects": True,
                "filters": {},
                "approach": "comprehensive"
            }
    
    def _execute_search_strategy(self, strategy: Dict, query: str, state: AgentState):
        """Execute the determined search strategy"""
        state.search_performed = True
        
        # Step 1: Generate query variants if needed
        search_queries = [query]
        if strategy.get('use_variants', True):
            self._think(state, "Generating query variants for better coverage...")
            variants = self._generate_variants(query)
            search_queries.extend(variants)
            self._record_action(state, f"Generated {len(variants)} query variants", variants)
        
        # Step 2: Execute searches
        for search_query in search_queries[:3]:  # Limit to top 3 queries
            # Vector search in expert database
            self._think(state, f"Searching expert database with: '{search_query}'")
            vec_results = self.search_tools['normal_vec'].search(search_query, top_k=10)
            self._process_results(vec_results, state, 'expert_vector')
            
            # Keyword search in expert database
            kw_results = self.search_tools['normal_kw'].search(search_query, top_k=10)
            self._process_results(kw_results, state, 'expert_keyword')
            
            # Project-based search if strategy suggests
            if strategy.get('search_projects', True):
                self._think(state, f"Searching project database with: '{search_query}'")
                proj_vec_results = self.search_tools['proj_vec'].search(search_query, top_k=10)
                self._process_results(proj_vec_results, state, 'project_vector')
    
    def _generate_variants(self, query: str) -> List[str]:
        """Generate query variants"""
        try:
            return self.search_tools['refiner'].generate_variants(query)
        except:
            # Simple fallback variants
            terms = query.lower().split()
            variants = []
            if 'expert' not in query.lower():
                variants.append(f"{query} expert")
            if 'specialist' not in query.lower():
                variants.append(f"{query} specialist")
            return variants[:2]
    
    def _process_results(self, results: List[Dict], state: AgentState, source: str):
        """Process and deduplicate results"""
        if not results:
            return
            
        new_experts = 0
        for expert in results:
            expert_id = expert['expert_id']
            
            # Store only if new or better score
            if expert_id not in state.all_results:
                state.all_results[expert_id] = expert
                state.all_results[expert_id]['sources'] = [source]
                new_experts += 1
            else:
                # Update if better score
                existing_score = state.all_results[expert_id].get('_score', 0)
                new_score = expert.get('_score', 0)
                if new_score > existing_score:
                    state.all_results[expert_id].update(expert)
                state.all_results[expert_id]['sources'].append(source)
        
        if new_experts > 0:
            self._record_action(state, f"{source} search", f"Found {new_experts} new experts")
    
    def _expand_search(self, query: str, state: AgentState):
        """Expand search if initial results are insufficient"""
        self._think(state, "Expanding search with broader terms...")
        
        # Extract key terms and search more broadly
        key_terms = re.findall(r'\b\w+\b', query.lower())
        important_terms = [term for term in key_terms if len(term) > 3 and term not in 
                          ['expert', 'find', 'search', 'looking', 'want', 'need']]
        
        for term in important_terms[:3]:
            broad_results = self.search_tools['normal_vec'].search(term, top_k=5)
            self._process_results(broad_results, state, f'expanded_{term}')
    
    def _generate_final_answer(self, state: AgentState) -> str:
        """Generate the final answer with all results"""
        if not state.all_results:
            return ("I apologize, but I couldn't find any experts matching your criteria. "
                   "Try using different keywords or being more specific about the expertise you need.")
        
        # Sort experts by score
        sorted_experts = sorted(
            state.all_results.values(),
            key=lambda x: x.get('_score', 0),
            reverse=True
        )
        
        # Build answer
        total_found = len(state.all_results)
        answer = f"✅ **Found {total_found} experts matching your criteria!**\n\n"
        
        # Add search context
        if 'medicine' in state.query.lower() or 'pharmacy' in state.query.lower():
            answer += "🏥 *Experts in medical/pharmaceutical fields"
        if 'business' in state.query.lower():
            answer += " with business expertise"
        answer += "*\n\n"
        
        # Show top 5 experts
        answer += "**Top 5 Experts:**\n\n"
        
        for i, expert in enumerate(sorted_experts[:5], 1):
            score = expert.get('_score', 0)
            sources = expert.get('sources', [])
            
            answer += f"**{i}. {expert['expert_name']}**\n"
            answer += f"📊 Relevance Score: {score:.3f} "
            answer += f"(Found via: {', '.join(set(sources))})\n"
            answer += f"💼 {expert['headline']}\n"
            
            if expert.get('bio'):
                bio = expert['bio']
                if len(bio) > 200:
                    bio = bio[:200] + "..."
                answer += f"📝 {bio}\n"
            
            answer += "\n"
        
        if total_found > 5:
            answer += f"📋 *Plus {total_found - 5} more experts available.*\n\n"
            answer += "💡 **Would you like to:**\n"
            answer += "- See more experts?\n"
            answer += "- Filter by specific criteria?\n"
            answer += "- Get contact information?"
        
        return answer
    
    def _summarize_result(self, result: Any) -> str:
        """Create a summary of any result"""
        if isinstance(result, list):
            if result and isinstance(result[0], dict) and 'expert_name' in result[0]:
                names = [r['expert_name'] for r in result[:3]]
                summary = f"{len(result)} experts: {', '.join(names)}"
                if len(result) > 3:
                    summary += f" +{len(result)-3} more"
                return summary
            elif result and isinstance(result[0], str):
                return f"{len(result)} items: {', '.join(result[:2])}"
            else:
                return f"{len(result)} results"
        return str(result)[:100]
    
    def display_thought_process(self, state: AgentState):
        """Display complete thought process"""
        print("\n" + "="*70)
        print("🧠 AGENT THOUGHT PROCESS SUMMARY")
        print("="*70)
        
        print(f"\n📝 Original Query: '{state.query}'")
        print(f"🔄 Iterations: {state.iteration_count}")
        print(f"🎯 Experts Found: {len(state.all_results)}")
        
        if state.thoughts:
            print("\n💭 Key Thoughts:")
            for i, thought in enumerate(state.thoughts, 1):
                print(f"   {i}. {thought}")
        
        if state.actions_taken:
            print("\n🔧 Actions Performed:")
            for i, action in enumerate(state.actions_taken, 1):
                print(f"   {i}. {action['action']} → {action['result_summary']}")
        
        print("\n" + "="*70)


# ===============================================
# MAIN EXECUTION
# ===============================================

if __name__ == "__main__":
    print("🚀 Initializing Autonomous Expert Search System...")
    
    # Initialize all search tools
    norm_vec = StructuredVectorSearchTool()
    norm_kw = StructuredKeywordSearchTool()
    
    print("📚 Loading expert database...")
    df_norm = pd.read_csv("experts_202505291522.csv", encoding="utf8")
    norm_vec.add_documents(df_norm)
    norm_kw.add_documents(df_norm)
    
    print("📊 Loading project-expert mappings...")
    df_proj = pd.read_csv("project_expert_data.csv", encoding="latin1")
    docs = extract_agenda_docs(df_proj)
    
    proj_vec = AgendaVectorSearchTool()
    proj_kw = AgendaKeywordSearchTool()
    proj_vec.add_documents(docs)
    proj_kw.add_documents(docs)
    
    # Create supporting tools
    reranker = AgendaResultsReranker(alpha=0.6)
    refiner = GeminiQueryRefiner(n_variants=3)
    
    # Create the autonomous agent
    print("\n🤖 Creating Autonomous AI Agent...")
    agent = AutonomousExpertSearchAgent({
        'normal_vec': norm_vec,
        'normal_kw': norm_kw,
        'proj_vec': proj_vec,
        'proj_kw': proj_kw,
        'reranker': reranker,
        'refiner': refiner
    })
    
    print("\n" + "="*70)
    print("🎯 AUTONOMOUS AI EXPERT FINDER")
    print("="*70)
    print("I'm a fully autonomous AI agent that finds experts for you!")
    print("Just tell me what kind of expert you need.\n")
    
    # Main loop
    while True:
        query = input("\n💬 You: ").strip()
        
        if query.lower() in ['exit', 'quit', 'bye']:
            print("\n👋 AI Agent: Goodbye! Thanks for using Expert Finder!")
            break
        
        if not query:
            continue
        
        # Run the autonomous agent
        state = agent.run(query)
        
        # Display results
        print("\n" + "="*70)
        print("🎯 RESULTS")
        print("="*70)
        print(state.final_answer)
        
        # Always show thought process
        agent.display_thought_process(state)


🚀 Initializing Autonomous Expert Search System...
📚 Loading expert database...


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
import os
import json
import re
import pandas as pd
from typing import List, Dict, Any, Optional, Union, Callable
from dataclasses import dataclass, field
import random

from dotenv import load_dotenv
import google.generativeai as genai
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.http.models import VectorParams, Distance, PointStruct
from rank_bm25 import BM25Okapi

load_dotenv()
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

# ===============================================
# PART 1: ALL SEARCH TOOL CLASSES
# ===============================================

class StructuredVectorSearchTool:
    def __init__(
        self,
        collection_name: str = "norm_experts",
        qdrant_url: str = "http://localhost:6333",
        embedding_model: str = "all-MiniLM-L6-v2"
    ):
        self.model = SentenceTransformer(embedding_model)
        self.client = QdrantClient(url=qdrant_url)
        self.collection_name = collection_name

        if self.client.collection_exists(collection_name):
            self.client.delete_collection(collection_name)
        self.client.create_collection(
            collection_name=collection_name,
            vectors_config=VectorParams(
                size=self.model.get_sentence_embedding_dimension(),
                distance=Distance.COSINE
            )
        )

    def _aggregate_text(self, doc: dict) -> str:
        parts: List[str] = []
        for fld in ("bio", "headline"):
            v = doc.get(fld, "")
            if isinstance(v, str) and v.strip():
                parts.append(v.strip())

        geo = doc.get("geography_details", [])
        if isinstance(geo, str):
            try:
                geo = json.loads(geo)
            except json.JSONDecodeError:
                geo = []
        if isinstance(geo, list):
            names = [g.get("name","") for g in geo if isinstance(g, dict)]
            if names:
                parts.append(", ".join(names))

        exp = doc.get("expertise_in_these_geographies", "")
        if isinstance(exp, str) and exp.strip():
            parts.append(exp.strip())

        raw = doc.get("work_experiences", [])
        if isinstance(raw, str):
            try:
                raw = json.loads(raw)
            except json.JSONDecodeError:
                raw = []
        if isinstance(raw, list):
            for we in raw:
                if not isinstance(we, dict):
                    continue
                t = (we.get("designation") or "").strip()
                d = (we.get("job_description") or "").strip()
                if t or d:
                    parts.append(f"{t}: {d}")

        return "\n".join(parts)

    def add_documents(self, docs: pd.DataFrame | List[dict]):
        if isinstance(docs, pd.DataFrame):
            docs = docs.to_dict(orient="records")
        texts = [self._aggregate_text(d) for d in docs]
        embs = self.model.encode(texts, show_progress_bar=True)

        points: List[PointStruct] = []
        for d, v in zip(docs, embs):
            rid = int(d.get("id", 0))
            points.append(PointStruct(
                id=rid,
                vector=v.tolist(),
                payload=d
            ))
        self.client.upsert(
            collection_name=self.collection_name,
            points=points,
            wait=True
        )

    def search(self, query: str, top_k: int = 10) -> List[Dict[str,Any]]:
        qv = self.model.encode([query])[0].tolist()
        hits = self.client.search(
            collection_name=self.collection_name,
            query_vector=qv,
            limit=top_k
        )
        results: List[Dict[str,Any]] = []
        for h in hits:
            p = h.payload
            results.append({
                "expert_id":    int(p.get("id", 0)),
                "expert_name":  p.get("expert_name", "") or p.get("name",""),
                "bio":          p.get("bio",""),
                "headline":     p.get("headline",""),
                "work_summary": "",
                "_score":       h.score
            })
        return results


class StructuredKeywordSearchTool:
    def __init__(self, k1: float = 1.5, b: float = 0.75):
        self.k1 = k1
        self.b  = b
        self.docs: List[dict] = []
        self.bm25: Optional[BM25Okapi] = None

    def _tokenize(self, text: str) -> List[str]:
        return re.findall(r"\w+", text.lower())

    def _aggregate_text(self, doc: dict) -> str:
        return StructuredVectorSearchTool()._aggregate_text(doc)

    def add_documents(self, docs: pd.DataFrame | List[dict]):
        if isinstance(docs, pd.DataFrame):
            docs = docs.to_dict(orient="records")
        self.docs = docs
        corpus = [self._aggregate_text(d) for d in docs]
        toks = [self._tokenize(c) for c in corpus]
        self.bm25 = BM25Okapi(toks, k1=self.k1, b=self.b)

    def search(self, query: str, top_k: int = 10) -> List[Dict[str,Any]]:
        if self.bm25 is None:
            raise RuntimeError("Index not built")
        qt = self._tokenize(query)
        scores = self.bm25.get_scores(qt)
        idxs = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:top_k]
        results: List[Dict[str,Any]] = []
        for i in idxs:
            d = self.docs[i]
            results.append({
                "expert_id":    int(d.get("id", 0)),
                "expert_name":  d.get("expert_name","") or d.get("name",""),
                "bio":          d.get("bio",""),
                "headline":     d.get("headline",""),
                "work_summary": "",
                "_score":       float(scores[i])
            })
        return results


def extract_agenda_docs(df: pd.DataFrame) -> List[dict]:
    out: List[dict] = []
    for _, row in df.iterrows():
        try:
            eid = int(row["expert_id"])
        except (KeyError, ValueError):
            continue
        bio     = row.get("expert_bio","") or ""
        head    = row.get("expert_headline","") or ""
        summary = row.get("expert_work_summary","") or ""
        raw     = row.get("project_agenda_responses","[]")
        try:
            arr = json.loads(raw)
        except json.JSONDecodeError:
            continue
        for idx, qa in enumerate(arr):
            q = (qa.get("question") or "").strip()
            a = (qa.get("answer")   or "").strip()
            txt = f"{q} {a}".strip()
            if not txt:
                continue
            doc_id = eid*1000 + idx
            out.append({
                "_id":                 doc_id,
                "expert_id":           eid,
                "expert_name":         row.get("expert_name","") or "",
                "expert_bio":          bio,
                "expert_headline":     head,
                "expert_work_summary": summary,
                "text":                txt
            })
    return out


class AgendaVectorSearchTool:
    def __init__(
        self,
        collection_name: str = "agenda_responses",
        qdrant_url: str = "http://localhost:6333",
        embedding_model: str = "all-MiniLM-L6-v2"
    ):
        self.model = SentenceTransformer(embedding_model)
        self.client = QdrantClient(url=qdrant_url)
        self.collection_name = collection_name

        if self.client.collection_exists(collection_name):
            self.client.delete_collection(collection_name)
        self.client.create_collection(
            collection_name=collection_name,
            vectors_config=VectorParams(
                size=self.model.get_sentence_embedding_dimension(),
                distance=Distance.COSINE
            )
        )

    def add_documents(self, docs: List[dict]):
        texts = [d["text"] for d in docs]
        embs  = self.model.encode(texts, show_progress_bar=True)
        points: List[PointStruct] = []
        for d, emb in zip(docs, embs):
            points.append(PointStruct(
                id=d["_id"],
                vector=emb.tolist(),
                payload={
                    "expert_id":    d["expert_id"],
                    "expert_name":  d["expert_name"],
                    "bio":          d["expert_bio"],
                    "headline":     d["expert_headline"],
                    "work_summary": d["expert_work_summary"],
                    "text":         d["text"]
                }
            ))
        self.client.upsert(
            collection_name=self.collection_name,
            points=points,
            wait=True
        )

    def search(self, query: str, top_k: int = 10) -> List[Dict[str,Any]]:
        qv = self.model.encode([query])[0].tolist()
        hits = self.client.search(
            collection_name=self.collection_name,
            query_vector=qv,
            limit=top_k
        )
        return [
            {
                "expert_id":    h.payload["expert_id"],
                "expert_name":  h.payload["expert_name"],
                "bio":          h.payload["bio"],
                "headline":     h.payload["headline"],
                "work_summary": h.payload["work_summary"],
                "text":         h.payload.get("text", ""),
                "_score":       h.score
            }
            for h in hits
        ]


class AgendaKeywordSearchTool:
    def __init__(self, k1: float = 1.5, b: float = 0.75):
        self.k1 = k1
        self.b  = b
        self.docs: List[dict] = []
        self.bm25: Optional[BM25Okapi] = None

    def _tokenize(self, text: str) -> List[str]:
        return re.findall(r"\w+", text.lower())

    def add_documents(self, docs: List[dict]):
        self.docs = docs
        corpus = [d["text"] for d in docs]
        toks = [self._tokenize(c) for c in corpus]
        self.bm25 = BM25Okapi(toks, k1=self.k1, b=self.b)

    def search(self, query: str, top_k: int = 10) -> List[Dict[str,Any]]:
        if not self.bm25:
            raise RuntimeError("Index not built")
        qt = self._tokenize(query)
        scores = self.bm25.get_scores(qt)
        idxs = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:top_k]
        return [
            {
                "expert_id":    self.docs[i]["expert_id"],
                "expert_name":  self.docs[i]["expert_name"],
                "bio":          self.docs[i]["expert_bio"],
                "headline":     self.docs[i]["expert_headline"],
                "work_summary": self.docs[i]["expert_work_summary"],
                "_score":       float(scores[i])
            }
            for i in idxs
        ]


class AgendaResultsReranker:
    def __init__(self, alpha: float = 0.5, model_name: str = "gemini-1.5-flash"):
        self.alpha = alpha
        self.model_name = model_name

    def rerank_and_fuse(self, vec_results: List[Dict], kw_results: List[Dict], query: str = None) -> List[Dict]:
        """Merge and rerank results from vector and keyword searches"""
        all_results = {}
        
        # Add vector results
        for r in vec_results:
            eid = r.get('expert_id')
            if eid not in all_results:
                all_results[eid] = r.copy()
                all_results[eid]['vec_score'] = r.get('_score', 0)
                all_results[eid]['kw_score'] = 0
        
        # Add keyword results
        for r in kw_results:
            eid = r.get('expert_id')
            if eid in all_results:
                all_results[eid]['kw_score'] = r.get('_score', 0)
            else:
                all_results[eid] = r.copy()
                all_results[eid]['vec_score'] = 0
                all_results[eid]['kw_score'] = r.get('_score', 0)
        
        # Calculate fused scores
        results = list(all_results.values())
        if not results:
            return []
            
        max_vec = max((r['vec_score'] for r in results), default=1.0) or 1.0
        max_kw = max((r['kw_score'] for r in results), default=1.0) or 1.0
        
        for r in results:
            r['vec_norm'] = r['vec_score'] / max_vec
            r['kw_norm'] = r['kw_score'] / max_kw
            r['fused_score'] = self.alpha * r['vec_norm'] + (1 - self.alpha) * r['kw_norm']
        
        # Sort by fused score
        results.sort(key=lambda x: x['fused_score'], reverse=True)
        return results[:10]


class GeminiQueryRefiner:
    def __init__(self, model_name: str="gemini-1.5-flash", n_variants: int=3):
        self.model_name = model_name
        self.n_variants = n_variants

    def generate_variants(self, query: str, context: Optional[str]=None) -> List[str]:
        prompt = f"Generate {self.n_variants} alternative ways to search for: '{query}'\nReturn only the alternatives, one per line."
        try:
            model = genai.GenerativeModel(self.model_name)
            resp = model.generate_content(prompt)
            text = resp.text.strip()
            lines = [l.strip() for l in text.split('\n') if l.strip() and not l.startswith('{')]
            return lines[:self.n_variants]
        except:
            return [query]  # Return original if generation fails


# ===============================================
# PART 2: FULLY AUTONOMOUS AI AGENT
# ===============================================

@dataclass
class AgentState:
    """Complete state of the agent's execution"""
    query: str
    thoughts: List[str] = field(default_factory=list)
    actions_taken: List[Dict[str, Any]] = field(default_factory=list)
    all_results: Dict[int, Dict[str, Any]] = field(default_factory=dict)
    iteration_count: int = 0
    final_answer: str = None


class FullyAutonomousAgent:
    """Fully autonomous AI agent for expert search"""
    
    def __init__(self, search_tools, llm_model="gemini-1.5-flash"):
        self.llm_model = llm_model
        self.search_tools = search_tools
        self.max_iterations = 6
        self.min_experts_threshold = 3
        
    def run(self, query: str) -> AgentState:
        """Main execution - fully autonomous"""
        state = AgentState(query=query)
        
        print("\nAI Agent activated. Working on your request...\n")
        
        # Check if it's just a greeting
        if self._is_greeting(query):
            state.final_answer = self._handle_greeting()
            return state
        
        # Execute autonomous search
        self._execute_autonomous_search(query, state)
        
        return state
    
    def _is_greeting(self, query: str) -> bool:
        greetings = ['hello', 'hi', 'hey', 'how are you', 'good morning', 'good afternoon']
        query_lower = query.lower().strip()
        return any(g in query_lower for g in greetings) and len(query_lower) < 30
    
    def _handle_greeting(self) -> str:
        return "Hello! I'm an AI expert finder. Tell me what kind of expert you're looking for"
    
    def _execute_autonomous_search(self, query: str, state: AgentState):
        """Execute fully autonomous search"""
        
        # Step 1: Analyze query
        self._log_thought(state, f"Analyzing query: '{query}'")
        
        # Step 2: Generate search variants
        self._log_thought(state, "Generating search variants for comprehensive coverage...")
        variants = self._generate_search_variants(query)
        search_queries = [query] + variants
        self._log_action(state, "Generated search variants", variants)
        
        # Step 3: Execute comprehensive search
        self._log_thought(state, "Executing comprehensive search across all databases...")
        
        for idx, search_query in enumerate(search_queries[:3], 1):
            self._log_thought(state, f"Search round {idx} with: '{search_query}'")
            
            # Vector search in expert database
            vec_results = self.search_tools['normal_vec'].search(search_query, top_k=10)
            self._process_results(vec_results, state, f'expert_vector_r{idx}')
            
            # Keyword search in expert database  
            kw_results = self.search_tools['normal_kw'].search(search_query, top_k=10)
            self._process_results(kw_results, state, f'expert_keyword_r{idx}')
            
            # Project database search
            proj_results = self.search_tools['proj_vec'].search(search_query, top_k=8)
            self._process_results(proj_results, state, f'project_vector_r{idx}')
        
        # Step 4: Check if we need more results
        unique_count = len(state.all_results)
        if unique_count < self.min_experts_threshold:
            self._log_thought(state, f"Only found {unique_count} experts. Expanding search...")
            self._expand_search(query, state)
        
        # Step 5: Generate final answer
        self._log_thought(state, "Preparing final results...")
        state.final_answer = self._generate_final_answer(state)
    
    def _generate_search_variants(self, query: str) -> List[str]:
        """Generate query variants"""
        try:
            return self.search_tools['refiner'].generate_variants(query)
        except:
            # Fallback variants
            variants = []
            if 'expert' not in query.lower():
                variants.append(f"{query} expert")
            if 'specialist' not in query.lower():
                variants.append(f"{query} specialist")
            return variants
    
    def _process_results(self, results: List[Dict], state: AgentState, source: str):
        """Process and store results"""
        if not results:
            self._log_action(state, f"{source} search", "No results")
            return
        
        new_count = 0
        for expert in results:
            expert_id = expert['expert_id']
            if expert_id not in state.all_results:
                state.all_results[expert_id] = expert
                state.all_results[expert_id]['sources'] = [source]
                new_count += 1
            else:
                # Update if better score
                if expert.get('_score', 0) > state.all_results[expert_id].get('_score', 0):
                    state.all_results[expert_id].update(expert)
                state.all_results[expert_id]['sources'].append(source)
        
        self._log_action(state, f"{source} search", f"Found {len(results)} results ({new_count} new)")
    
    def _expand_search(self, query: str, state: AgentState):
        """Expand search with individual terms"""
        terms = [t for t in query.split() if len(t) > 3 and t.lower() not in 
                ['expert', 'find', 'search', 'looking', 'want', 'need']]
        
        for term in terms[:2]:
            results = self.search_tools['normal_vec'].search(term, top_k=5)
            self._process_results(results, state, f'expanded_{term}')
    
    def _generate_final_answer(self, state: AgentState) -> str:
        """Generate final answer with results"""
        if not state.all_results:
            return "I couldn't find any experts matching your criteria. Try using different keywords or being more specific."
        
        # Sort by score
        sorted_experts = sorted(
            state.all_results.values(),
            key=lambda x: x.get('_score', 0),
            reverse=True
        )
        
        total = len(state.all_results)
        answer = f"✅ **Found {total} experts matching your criteria!**\n\n"
        
        # Add context
        if any(term in state.query.lower() for term in ['medicine', 'medical', 'pharmacy', 'healthcare']):
            answer += "🏥 *Showing medical/healthcare professionals*\n\n"
        if 'business' in state.query.lower():
            answer += "💼 *Including business expertise*\n\n"
        
        answer += "**Top 5 Experts:**\n\n"
        
        for i, expert in enumerate(sorted_experts[:5], 1):
            score = expert.get('_score', 0)
            answer += f"**{i}. {expert['expert_name']}** (Score: {score:.3f})\n"
            answer += f"📋 {expert['headline']}\n"
            
            if expert.get('bio'):
                bio = expert['bio'][:200]
                if len(expert['bio']) > 200:
                    bio += "..."
                answer += f"📝 {bio}\n"
            answer += "\n"
        
        if total > 5:
            answer += f"*Plus {total - 5} more experts available.*"
        
        return answer
    
    def _log_thought(self, state: AgentState, thought: str):
        state.thoughts.append(thought)
        print(f"💭 {thought}")
    
    def _log_action(self, state: AgentState, action: str, result: Any):
        state.actions_taken.append({'action': action, 'result': str(result)})
        print(f"🔧 {action} → {result}")
    
    def display_summary(self, state: AgentState):
        """Display execution summary"""
        print("\n" + "="*70)
        print("AGENT EXECUTION SUMMARY")
        print("="*70)
        print(f"Query: '{state.query}'")
        print(f"Total unique experts found: {len(state.all_results)}")
        print(f"Number of searches performed: {len([a for a in state.actions_taken if 'search' in a['action']])}")
        print("="*70)


# ===============================================
# MAIN EXECUTION
# ===============================================

if __name__ == "__main__":
    print("Initializing Fully Autonomous Expert Search System...")
    
    # Initialize search tools
    norm_vec = StructuredVectorSearchTool()
    norm_kw = StructuredKeywordSearchTool()
    
    print("Loading expert database...")
    df_norm = pd.read_csv("experts_202505291522.csv", encoding="utf8")
    norm_vec.add_documents(df_norm)
    norm_kw.add_documents(df_norm)
    
    print("Loading project-expert mappings...")
    df_proj = pd.read_csv("project_expert_data.csv", encoding="latin1")
    docs = extract_agenda_docs(df_proj)
    
    proj_vec = AgendaVectorSearchTool()
    proj_kw = AgendaKeywordSearchTool()
    proj_vec.add_documents(docs)
    proj_kw.add_documents(docs)
    
    # Create supporting tools
    reranker = AgendaResultsReranker(alpha=0.6)
    refiner = GeminiQueryRefiner(n_variants=2)
    
    # Create the fully autonomous agent
    print("\nCreating Fully Autonomous AI Agent...")
    agent = FullyAutonomousAgent({
        'normal_vec': norm_vec,
        'normal_kw': norm_kw,
        'proj_vec': proj_vec,
        'proj_kw': proj_kw,
        'reranker': reranker,
        'refiner': refiner
    })
    
    print("\n" + "="*70)
    print("FULLY AUTONOMOUS EXPERT FINDER")
    print("="*70)
    print("I automatically search and find the best experts for you!")
    print("Just tell me what you're looking for.\n")
    
    # Main loop
    while True:
        query = input("\n💬 You: ").strip()
        
        if query.lower() in ['exit', 'quit', 'bye']:
            print("\nGoodbye! Thanks for using Expert Finder!")
            break
        
        if not query:
            continue
        
        # Run the fully autonomous agent
        state = agent.run(query)
        
        # Display results
        print("\n" + "="*70)
        print("EXPERT SEARCH RESULTS")
        print("="*70)
        print(state.final_answer)
        
        # Always display summary
        agent.display_summary(state)


🚀 Initializing Fully Autonomous Expert Search System...
📚 Loading expert database...


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

📊 Loading project-expert mappings...


Batches:   0%|          | 0/5 [00:00<?, ?it/s]


🤖 Creating Fully Autonomous AI Agent...

🎯 FULLY AUTONOMOUS EXPERT FINDER
I automatically search and find the best experts for you!
Just tell me what you're looking for.




💬 You:  I want some experts in the field of pharmacy,medical and business



🤖 AI Agent activated. Working on your request...

💭 Analyzing query: 'I want some experts in the field of pharmacy,medical and business'
💭 Generating search variants for comprehensive coverage...
🔧 Generated search variants → ['Pharmacy, medical, and business expert consultants', 'Pharmaceutical, medical, and business professionals']
💭 Executing comprehensive search across all databases...
💭 Search round 1 with: 'I want some experts in the field of pharmacy,medical and business'


  hits = self.client.search(


🔧 expert_vector_r1 search → No results
🔧 expert_keyword_r1 search → Found 10 results (10 new)


  hits = self.client.search(


🔧 project_vector_r1 search → Found 8 results (3 new)
💭 Search round 2 with: 'Pharmacy, medical, and business expert consultants'
🔧 expert_vector_r2 search → No results
🔧 expert_keyword_r2 search → Found 10 results (6 new)
🔧 project_vector_r2 search → Found 8 results (1 new)
💭 Search round 3 with: 'Pharmaceutical, medical, and business professionals'
🔧 expert_vector_r3 search → No results
🔧 expert_keyword_r3 search → Found 10 results (4 new)
🔧 project_vector_r3 search → Found 8 results (1 new)
💭 Preparing final results...

🎯 EXPERT SEARCH RESULTS
✅ **Found 25 experts matching your criteria!**

🏥 *Showing medical/healthcare professionals*

💼 *Including business expertise*

**Top 5 Experts:**

**1. ** (Score: 16.400)
📋 Operations expert with 26+ years of experience in Information Technology industry with exposure across United States and India.
📝 Mr. Swami is an Operations expert with 26+ years of experience in Information Technology industry. Currently working as Co-founder & CTO at 1Cor


💬 You:  But i think result are not that aligned with my query ?? 



🤖 AI Agent activated. Working on your request...

💭 Analyzing query: 'But i think result are not that aligned with my query ??'
💭 Generating search variants for comprehensive coverage...
🔧 Generated search variants → ['But i think result are not that aligned with my query ??']
💭 Executing comprehensive search across all databases...
💭 Search round 1 with: 'But i think result are not that aligned with my query ??'
🔧 expert_vector_r1 search → No results
🔧 expert_keyword_r1 search → Found 10 results (10 new)
🔧 project_vector_r1 search → Found 8 results (6 new)
💭 Search round 2 with: 'But i think result are not that aligned with my query ??'
🔧 expert_vector_r2 search → No results
🔧 expert_keyword_r2 search → Found 10 results (0 new)
🔧 project_vector_r2 search → Found 8 results (0 new)
💭 Preparing final results...

🎯 EXPERT SEARCH RESULTS
✅ **Found 16 experts matching your criteria!**

**Top 5 Experts:**

**1. ** (Score: 13.976)
📋 Sales expert with 25+ years of experience in Information Te