In [4]:
"""
PHARMA SECTOR RESEARCH AGENT
Specialized agent for pharmaceutical industry research
"""

import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain_community.utilities import SerpAPIWrapper
from langgraph.graph import StateGraph, END
from typing import TypedDict, List, Optional
import json
from datetime import datetime

# Load environment variables
load_dotenv()

# Initialize LLM and Search
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.3)
search = SerpAPIWrapper()

# State definition
class PharmaResearchState(TypedDict):
    query: str
    research_plan: List[str]
    current_step: int
    research_data: List[dict]
    follow_up_questions: List[str]
    final_report: Optional[str]
    sector: str

print("‚úÖ Pharma Agent Initialized!")
print("üìä Sector: PHARMA")


‚úÖ Pharma Agent Initialized!
üìä Sector: PHARMA


In [11]:
def generate_pharma_research_plan(state: PharmaResearchState) -> PharmaResearchState:
    """Generate pharma-specific research plan"""
    
    query = state["query"]
    
    prompt = f"""You are a pharmaceutical industry research expert.
    
User Query: {query}

Generate a comprehensive research plan with 7-10 key research topics.
Focus on pharmaceutical industry aspects:
- Drug pipelines and R&D
- Regulatory approvals (FDA, DCGI)
- Market dynamics and competition
- Clinical trials and patents
- Manufacturing capabilities
- Export performance
- Therapeutic areas
- Government policies

CRITICAL: Return ONLY a valid JSON array of strings. No markdown, no explanation, no code blocks.
Format: ["Topic 1", "Topic 2", "Topic 3"]
"""
    
    response = llm.invoke(prompt)
    content = response.content.strip()
    
    # Remove markdown code blocks if present
    if content.startswith("```"):
        content = content.split("```")[1]
        if content.startswith("json"):
            content = content[4:]
        content = content.strip()
    
    # Parse JSON
    try:
        research_plan = json.loads(content)
    except json.JSONDecodeError:
        print(f"‚ö†Ô∏è  JSON Parse Error. Raw response: {content[:200]}")
        # Fallback: create default plan
        research_plan = [
            f"Overview of {query}",
            "Market analysis and key players",
            "Recent developments and innovations",
            "Regulatory landscape",
            "Financial performance",
            "Future outlook and trends",
            "Competitive positioning"
        ]
        print("Using fallback research plan")
    
    print("\n" + "="*60)
    print("üìã PHARMA RESEARCH PLAN GENERATED:")
    print("="*60)
    for i, topic in enumerate(research_plan, 1):
        print(f"{i}. {topic}")
    print("="*60)
    
    state["research_plan"] = research_plan
    state["current_step"] = 0
    state["sector"] = "PHARMA"
    
    return state

print("‚úÖ Research Plan Generator Ready (with error handling)!")


‚úÖ Research Plan Generator Ready (with error handling)!


In [12]:
def pharma_deep_research(state: PharmaResearchState) -> PharmaResearchState:
    """Execute deep pharma research with iterative searches"""
    
    current_step = state["current_step"]
    research_plan = state["research_plan"]
    
    if current_step >= len(research_plan):
        return state
    
    current_topic = research_plan[current_step]
    print(f"\nüî¨ Researching: {current_topic}")
    
    # Initial search
    search_results = search.run(f"{current_topic} pharmaceutical India 2025-2026")
    
    # Analyze and generate follow-up questions
    analysis_prompt = f"""You are analyzing pharmaceutical research data.

Topic: {current_topic}
Initial Data: {search_results[:1000]}

Generate 2-3 specific follow-up search queries to deepen understanding.
Focus on: patents, clinical trials, market data, regulatory updates, company-specific info.

CRITICAL: Return ONLY a valid JSON array of strings. No markdown, no explanation.
Format: ["query 1", "query 2"]
"""
    
    response = llm.invoke(analysis_prompt)
    content = response.content.strip()
    
    # Remove markdown code blocks if present
    if content.startswith("```"):
        content = content.split("```")[1]
        if content.startswith("json"):
            content = content[4:]
        content = content.strip()
    
    # Parse JSON with fallback
    try:
        follow_up_queries = json.loads(content)
    except json.JSONDecodeError:
        print(f"  ‚ö†Ô∏è  Using default follow-up queries")
        follow_up_queries = [f"{current_topic} latest news 2026"]
    
    # Execute follow-up searches
    all_data = [{"query": current_topic, "results": search_results}]
    
    for fq in follow_up_queries[:2]:  # Limit to 2 follow-ups
        print(f"  ‚Ü≥ Follow-up: {fq}")
        follow_up_results = search.run(fq)
        all_data.append({"query": fq, "results": follow_up_results})
    
    # Store research data
    state["research_data"].append({
        "topic": current_topic,
        "step": current_step,
        "data": all_data
    })
    
    state["current_step"] += 1
    
    return state

def check_research_complete(state: PharmaResearchState) -> str:
    """Check if all research steps are complete"""
    if state["current_step"] >= len(state["research_plan"]):
        return "generate_report"
    return "continue_research"

print("‚úÖ Deep Research Engine Ready (with error handling)!")


‚úÖ Deep Research Engine Ready (with error handling)!


In [13]:
def generate_pharma_report(state: PharmaResearchState) -> PharmaResearchState:
    """Generate comprehensive pharma sector report"""
    
    print("\nüìù Generating Comprehensive Report...")
    
    # Compile all research data
    all_findings = ""
    for item in state["research_data"]:
        all_findings += f"\n\nTopic: {item['topic']}\n"
        for data in item['data']:
            all_findings += f"Data: {data['results'][:500]}\n"
    
    report_prompt = f"""You are a pharmaceutical industry analyst writing a comprehensive report.

Original Query: {state['query']}

Research Findings:
{all_findings[:8000]}

Write a detailed, professional report (1500-2000 words) covering:

1. EXECUTIVE SUMMARY
2. MARKET OVERVIEW
   - Current market size and growth
   - Key players and market share
3. R&D AND INNOVATION
   - Drug pipelines
   - Clinical trials
   - Patent landscape
4. REGULATORY ENVIRONMENT
   - Recent approvals
   - Policy changes
5. COMPETITIVE ANALYSIS
   - Major companies
   - Strategic initiatives
6. FUTURE OUTLOOK
   - Growth projections
   - Emerging trends
7. KEY INSIGHTS AND RECOMMENDATIONS

Use professional business language. Include specific data points and company names.
Format with proper headers and structure.
"""
    
    response = llm.invoke(report_prompt)
    final_report = response.content
    
    state["final_report"] = final_report
    
    # Save report
    os.makedirs("outputs/reports", exist_ok=True)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"outputs/reports/{timestamp}_PHARMA_{state['query'][:50].replace(' ', '_')}.md"
    
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(f"# Pharmaceutical Sector Research Report\n\n")
        f.write(f"**Query:** {state['query']}\n\n")
        f.write(f"**Date:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
        f.write(f"**Sector:** PHARMA\n\n")
        f.write("---\n\n")
        f.write(final_report)
    
    print(f"\nüíæ Report saved to: {filename}")
    
    return state

print("‚úÖ Report Generator Ready!")


‚úÖ Report Generator Ready!


In [14]:
# Build LangGraph workflow
workflow = StateGraph(PharmaResearchState)

# Add nodes
workflow.add_node("plan", generate_pharma_research_plan)
workflow.add_node("research", pharma_deep_research)
workflow.add_node("report", generate_pharma_report)

# Add edges
workflow.set_entry_point("plan")
workflow.add_edge("plan", "research")
workflow.add_conditional_edges(
    "research",
    check_research_complete,
    {
        "continue_research": "research",
        "generate_report": "report"
    }
)
workflow.add_edge("report", END)

# Compile
pharma_agent = workflow.compile()

print("‚úÖ Pharma Agent Graph Compiled!")
print("üéØ Ready to execute pharmaceutical research!")


‚úÖ Pharma Agent Graph Compiled!
üéØ Ready to execute pharmaceutical research!


In [15]:
def research_pharma_with_approval(query: str):
    """Execute pharma research with user approval"""
    
    print("="*60)
    print("üè• PHARMA SECTOR RESEARCH AGENT")
    print("="*60)
    print(f"Query: {query}\n")
    
    # Initialize state
    initial_state = {
        "query": query,
        "research_plan": [],
        "current_step": 0,
        "research_data": [],
        "follow_up_questions": [],
        "final_report": None,
        "sector": "PHARMA"
    }
    
    # Generate plan first
    print("üìä SECTOR DETECTED: PHARMA")
    state = generate_pharma_research_plan(initial_state)
    
    # Show approval prompt
    print("\n‚è∏Ô∏è  WAITING FOR APPROVAL")
    print("="*60)
    print("Review the research plan above.\n")
    print("Options:")
    print("  1. Execute research (type 'yes' or 'y')")
    print("  2. Modify plan (type 'modify')")
    print("\n" + "="*60)
    
    # Auto-approve for now (change to input() for interactive)
    approval = "yes"  # Change to: input("Your choice: ").lower()
    
    if approval in ['yes', 'y']:
        print("\nüìä GENERATING COMPREHENSIVE REPORT...")
        print("="*60)
        
        # Execute research
        final_state = pharma_agent.invoke(state)
        
        print("\nüéâ RESEARCH COMPLETE!")
        print(f"   Report saved: outputs/reports/[timestamp]_PHARMA_*.md")
        
        return final_state, final_state["final_report"]
    else:
        print("‚ùå Research cancelled")
        return None, None

print("‚úÖ Execution Function Ready!")
print("\nüöÄ PHARMA AGENT FULLY OPERATIONAL!")


‚úÖ Execution Function Ready!

üöÄ PHARMA AGENT FULLY OPERATIONAL!


In [16]:
# TEST: Pharma Agent
query = "Analyze Sun Pharma's drug pipeline and recent FDA approvals"

state, report = research_pharma_with_approval(query)

# Display report preview
if report:
    print("\n" + "="*60)
    print("üìÑ REPORT PREVIEW (First 500 characters)")
    print("="*60)
    print(report[:500] + "...")


üè• PHARMA SECTOR RESEARCH AGENT
Query: Analyze Sun Pharma's drug pipeline and recent FDA approvals

üìä SECTOR DETECTED: PHARMA

üìã PHARMA RESEARCH PLAN GENERATED:
1. Sun Pharma drug pipeline analysis
2. Recent FDA approvals for Sun Pharma
3. Regulatory landscape for Sun Pharma (FDA, DCGI)
4. Market dynamics and competitive analysis in the pharmaceutical sector
5. Clinical trial outcomes and patent strategies for Sun Pharma
6. Manufacturing capabilities and capacity of Sun Pharma
7. Export performance and international market presence of Sun Pharma
8. Therapeutic areas of focus in Sun Pharma's portfolio
9. Impact of government policies on Sun Pharma's operations
10. R&D investment trends and future outlook for Sun Pharma

‚è∏Ô∏è  WAITING FOR APPROVAL
Review the research plan above.

Options:
  1. Execute research (type 'yes' or 'y')
  2. Modify plan (type 'modify')


üìä GENERATING COMPREHENSIVE REPORT...

üìã PHARMA RESEARCH PLAN GENERATED:
1. Sun Pharma's current drug pipeline