In [1]:
# Config class definition

import json
import os

In [2]:
class Config:
    """Config class for GPT Researcher."""

    def __init__(self, config_file: str = None):
        """Initialize the config class."""
        self.config_file = config_file if config_file else os.getenv('CONFIG_FILE')
        self.retriever = os.getenv('SEARCH_RETRIEVER', "tavily")
        self.embedding_provider = os.getenv('EMBEDDING_PROVIDER', 'openai')
        self.llm_provider = os.getenv('LLM_PROVIDER', "openai")
        self.fast_llm_model = os.getenv('FAST_LLM_MODEL', "gpt-3.5-turbo")
        self.smart_llm_model = os.getenv('SMART_LLM_MODEL', "gpt-4o")
        self.fast_token_limit = int(os.getenv('FAST_TOKEN_LIMIT', 2000))
        self.smart_token_limit = int(os.getenv('SMART_TOKEN_LIMIT', 4000))
        self.browse_chunk_max_length = int(os.getenv('BROWSE_CHUNK_MAX_LENGTH', 8192))
        self.summary_token_limit = int(os.getenv('SUMMARY_TOKEN_LIMIT', 700))
        self.temperature = float(os.getenv('TEMPERATURE', 0.55))
        self.user_agent = os.getenv('USER_AGENT', "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
                                                 "(KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0")
        self.max_search_results_per_query = int(os.getenv('MAX_SEARCH_RESULTS_PER_QUERY', 5))
        self.memory_backend = os.getenv('MEMORY_BACKEND', "local")
        self.total_words = int(os.getenv('TOTAL_WORDS', 1000))
        self.report_format = os.getenv('REPORT_FORMAT', "APA")
        self.max_iterations = int(os.getenv('MAX_ITERATIONS', 3))
        self.agent_role = os.getenv('AGENT_ROLE', None)
        self.scraper = os.getenv("SCRAPER", "bs")
        self.max_subtopics = os.getenv("MAX_SUBTOPICS", 3)
        self.load_config_file()

    def load_config_file(self) -> None:
        """Load the config file."""
        if self.config_file is None:
            return None
        with open(self.config_file, "r") as f:
            config = json.load(f)
        for key, value in config.items():
            self.__dict__[key] = value


In [3]:
# Create Config object
config = Config()

In [4]:
import asyncio
from sf_researcher.utils.llm import *
from sf_researcher.utils.validators import *
import asyncio
import nest_asyncio

nest_asyncio.apply()

In [5]:
# Create dummy data for testing

test_task = "Research the company's directors and generate a report."
test_data = "Example Company Inc. is a multinational corporation specializing in technology and software solutions."
test_sub_query = "Find information about the director John Doe."
test_visited_urls = ["http://example.com/director1", "http://example.com/director2"]
test_company = "Example Company Inc."
test_context = "Contextual information about Example Company Inc. for detailed research."
test_config = {
    "smart_llm_model": "gpt-3.5-turbo",
    "fast_llm_model": "gpt-3.5-turbo",
    "llm_provider": "openai",
    "max_subtopics": 5
}

# Create a dummy compliance report request
compliance_report_request = {
    "query": "Investigate the compliance status of Example Company Inc.",
    "salesforce_id": "001xx000003DGbOAAW",
    "directors": ["John Doe", "Jane Smith"],
    "include_domains": ["example.com"],
    "exclude_domains": ["exclude-example.com"],
    "parent_sub_queries": ["Parent query example"],
    "child_sub_queries": ["Child query example"]
}


In [6]:
from dotenv import load_dotenv
load_dotenv()
LANGCHAIN_TRACING_V2 = os.getenv("LANGCHAIN_TRACING_V2")
LANGCHAIN_ENDPOINT = os.getenv("LANGCHAIN_ENDPOINT")
LANGCHAIN_API_KEY = os.getenv("LANGCHAIN_API_KEY")
LANGCHAIN_PROJECT = os.getenv("LANGCHAIN_PROJECT")

In [7]:
async def test_construct_company_sobject():
    try:
        response = await construct_company_sobject(
            subtopic_report=test_task,
            visited_urls=test_visited_urls,
            company=test_company,
            context=test_context,
            config=config
        )
        print(f"Company SObject: {response}")
    except Exception as e:
        print(f"Error: {e}")

# To run the async function
await test_construct_company_sobject()



🤖 Calling gpt-3.5-turbo...

Construct Company SObject Output Tool Schema:  [{'type': 'function', 'function': {'name': 'CompanySobject', 'description': 'Company Details', 'parameters': {'type': 'object', 'properties': {'company_name': {'description': 'Full legal company name', 'type': 'string'}, 'registration_number': {'description': 'Company registration number', 'default': '', 'type': 'string'}, 'incorporation_date': {'description': 'Incorporation date in yyyy-mm-dd format (e.g. 2023-02-13)', 'default': '', 'type': 'string'}, 'jurisdiction': {'description': 'Jurisdiction of incorporation', 'default': '', 'type': 'string'}, 'registered_address': {'description': 'Registered office address', 'default': '', 'type': 'string'}, 'licenses': {'description': 'Regulatory licenses and registrations held', 'default': '', 'type': 'string'}, 'regulatory_actions': {'description': 'Past regulatory actions, fines or investigations', 'default': '', 'type': 'string'}, 'adverse_media': {'description': '

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Construct Company SObject Output:  company_name='Example Company Inc.' registration_number='' incorporation_date='' jurisdiction='' registered_address='' licenses='' regulatory_actions='' adverse_media='' risk_assessment=<RiskAssessmentEnum.medium: 'medium'> primary_source_url="['http://example.com/director1', 'http://example.com/director2']"
Company SObject: company_name='Example Company Inc.' registration_number='' incorporation_date='' jurisdiction='' registered_address='' licenses='' regulatory_actions='' adverse_media='' risk_assessment=<RiskAssessmentEnum.medium: 'medium'> primary_source_url="['http://example.com/director1', 'http://example.com/director2']"
