In [3]:
from langchain.agents import initialize_agent, AgentType
from langchain.tools import BaseTool
from langchain.chat_models import ChatOpenAI
from langchain.tools import BaseTool
import requests
import os
from typing import Optional
import PyPDF2
from datetime import datetime

In [4]:
from dotenv import load_dotenv
import os
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")
explorium_api_key = os.getenv("EXPLORIUM_API_KEY")

In [5]:
class CompanyContextReader(BaseTool):
    name: str = "company_context_reader"
    description: str = "Reads company information files to understand DataGuardian AI's offerings and value proposition"
    return_direct: bool = False
    context_dir: str = "company_context"
    
    def __init__(self, context_dir: Optional[str] = None):
        super().__init__()
        if context_dir:
            self.context_dir = context_dir
        os.makedirs(self.context_dir, exist_ok=True)
    
    def _run(self, query: str = "") -> str:
        context = []
        for filename in os.listdir(self.context_dir):
            filepath = os.path.join(self.context_dir, filename)
            try:
                if filename.endswith('.pdf'):
                    with open(filepath, 'rb') as file:
                        pdf_reader = PyPDF2.PdfReader(file)
                        text = ""
                        for page in pdf_reader.pages:
                            text += page.extract_text() + "\n"
                        context.append(f"Content from {filename}:\n{text}")
                elif filename.endswith('.txt'):
                    with open(filepath, 'r') as file:
                        text = file.read()
                        context.append(f"Content from {filename}:\n{text}")
            except Exception as e:
                context.append(f"Error reading {filename}: {str(e)}")
        
        if not context:
            return "No context files found. Please provide company/product information files."
        
        return "\n\n---\n\n".join(context)


In [6]:
class CompanyResearcher(BaseTool):
    name: str = "company_researcher"
    description: str = "Get detailed firmographic information about a company using its business ID"
    return_direct: bool = False
    
    def _run(self, business_id: str) -> str:
        api_key = explorium_api_key
        url = "https://api.explorium.ai/v1/businesses/firmographics/enrich"
        headers = {
            "accept": "application/json",
            "content-type": "application/json",
            "api_key": api_key
        }
        payload = {"business_id": business_id}
        
        response = requests.post(url, json=payload, headers=headers)
        if response.status_code == 200:
            data = response.json().get("data", {})
            firmographics = {
                "company_name": data.get("name"),
                "description": data.get("business_description"),
                "location": f"{data.get('country_name', 'N/A')}, {data.get('region_name', 'N/A')}",
                "industry": data.get("naics_description"),
                "employee_range": data.get("number_of_employees_range"),
                "revenue_range": data.get("yearly_revenue_range"),
                "linkedin": data.get("linkedin_profile")
            }
            return str(firmographics)
        return f"Error: {response.status_code}, {response.text}"

    async def _arun(self, business_id: str) -> str:
        raise NotImplementedError("Async version not implemented")



In [7]:
class LinkedInPostAnalyzer(BaseTool):
    name: str = "linkedin_post_analyzer"
    description: str = "Analyze recent LinkedIn posts from a company using its business ID to understand their current focus and initiatives"
    return_direct: bool = False
    
    def _run(self, business_id: str) -> str:
        api_key = explorium_api_key
        url = "https://api.explorium.ai/v1/businesses/linkedin_posts/enrich"
        headers = {
            "accept": "application/json",
            "content-type": "application/json",
            "api_key": api_key
        }
        payload = {"business_id": business_id}
        
        response = requests.post(url, json=payload, headers=headers)
        if response.status_code == 200:
            posts = response.json().get("data", [])
            
            # Get the 5 most recent posts (sorted by days_since_posted)
            sorted_posts = sorted(posts, key=lambda x: x.get('days_since_posted', float('inf')))
            recent_posts = sorted_posts[:5]
            
            # Format the posts
            formatted_posts = []
            for post in recent_posts:
                formatted_post = {
                    "date": post.get("created_at"),
                    "content": post.get("post_text"),
                    "likes": post.get("number_of_likes"),
                    "comments": post.get("number_of_comments"),
                    "url": post.get("post_url"),
                    "days_since_posted": post.get("days_since_posted")
                }
                formatted_posts.append(formatted_post)
            
            return str({"recent_posts": formatted_posts})
        return f"Error: {response.status_code}, {response.text}"

In [8]:
# Business Challenges Tool
class BusinessChallengesAnalyzer(BaseTool):
    name: str = "business_challenges_analyzer"
    description: str = "Analyze business challenges and risks for a company using its business ID"
    return_direct: bool = False
    
    def _run(self, business_id: str) -> str:
        api_key = explorium_api_key
        url = "https://api.explorium.ai/v1/businesses/pc_business_challenges_10k/enrich"
        headers = {
            "accept": "application/json",
            "content-type": "application/json",
            "api_key": api_key
        }
        payload = {"business_id": business_id}
        
        response = requests.post(url, json=payload, headers=headers)
        if response.status_code == 200:
            data = response.json().get("data", {})
            challenges = {
                "technological_disruption": ", ".join(data.get("technological_disruption", [])),
                "security_breach": ", ".join(data.get("company_data_security_breach", [])),
                "market_saturation": ", ".join(data.get("company_market_saturation", [])),
                "competition": ", ".join(data.get("company_competition", [])),
                "customer_adoption": ", ".join(data.get("company_customer_adoption", []))
            }
            return str(challenges)
        return f"Error: {response.status_code}, {response.text}"

    async def _arun(self, business_id: str) -> str:
        raise NotImplementedError("Async version not implemented")

In [11]:
def generate_and_save_pitch(openai_api_key: str, business_id: str, context_dir: str = "company_context") -> str:
    """
    Generate and save a personalized sales pitch using DataGuardian AI's context files and target company research.
    
    Args:
        openai_api_key (str): OpenAI API key
        business_id (str): Target company's business ID
        context_dir (str): Directory containing context files about DataGuardian AI
    
    Returns:
        str: Path to the saved sales pitch file
    """
    # Initialize the language model
    llm = ChatOpenAI(
        temperature=0.7,
        model="gpt-4",
        openai_api_key=openai_api_key
    )
    
    # Initialize all tools
    tools = [
        CompanyContextReader(context_dir=context_dir),
        CompanyResearcher(),
        BusinessChallengesAnalyzer(),
        LinkedInPostAnalyzer()
    ]
    
    # Create the agent with an updated prompt
    agent = initialize_agent(
        tools,
        llm,
        agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
        verbose=True,
        handle_parsing_errors=True,
        max_iterations=5,
        early_stopping_method="generate",
        agent_kwargs={
            "prefix": """You are an expert sales development representative for DataGuardian AI, tasked with creating highly personalized sales emails. 
            You specialize in selling data security and governance solutions to enterprises using data cloud platforms.
            
            Follow these steps in order:
            
            1. Use the company_context_reader tool to understand DataGuardian AI's offerings, success metrics, and case studies
            2. Research the target company using their business ID with the CompanyResearcher tool
            3. Analyze their business challenges using the BusinessChallengesAnalyzer tool
            4. Review their recent LinkedIn activity using the LinkedInPostAnalyzer tool
            5. Create a compelling, personalized sales pitch that:
               - Shows deep understanding of their data security and governance needs
               - Addresses specific compliance and security challenges they face
               - Demonstrates how DataGuardian AI's solution complements their existing data infrastructure
               - References relevant case studies and metrics from similar clients
               - Emphasizes the seamless integration with their current tech stack
            
            Format the output as:
            # Sales Pitch for [Company Name]
            
            ## Subject Line
            [Compelling subject line focusing on data security/governance value]
            
            ## Personalized Message
            [Warm introduction]
            [Show understanding of their business and current data initiatives]
            [Address specific security/compliance challenges]
            [Present DataGuardian AI's relevant solution]
            [Include success metrics from similar case studies]
            
            ## Value Proposition
            [List 3-4 specific benefits for their use case]
            [Include relevant metrics from our case studies]
            
            ## Call to Action
            [Specific next steps for a technical demo]
            
            Generated: [Current Date]"""
        }
    )
    
    # Generate the sales pitch
    result = agent.run(f"""Create a personalized sales pitch for {business_id}. First understand DataGuardian AI's offerings 
    from the context files, then research {business_id} to create a compelling pitch focusing on their specific data 
    security and governance needs. Emphasize how we complement their existing data cloud infrastructure.""")
    
    # Simplify the filename creation - just use the business_id instead of trying to extract company name
    company_name = business_id.replace(' ', '_').lower()
    
    # Create sales_pitches directory if it doesn't exist
    os.makedirs('sales_pitches', exist_ok=True)
    
    # Add current date to the result
    current_date = datetime.now().strftime("%Y-%m-%d")
    result += f"\n\nGenerated: {current_date}"
    
    # Save the pitch to a markdown file
    filename = f"sales_pitches/{company_name}_sales_pitch.md"
    with open(filename, 'w') as f:
        f.write(result)
    
    return f"Sales pitch has been saved to {filename}"


In [None]:
result = generate_and_save_pitch(
    openai_api_key=openai_api_key,
    business_id="e921d7dce42fbcb84bb2110d925ad778",
    context_dir="company_context"
)
print(result)