### Search

In [1]:
from sf_researcher.retrievers.duckduckgo.duckduckgo import Duckduckgo

In [2]:
ddg = Duckduckgo(
    query="\"OpenSea\" company registered directors"
)

In [3]:
search_results = ddg.search(max_results=10)
# for result in search_results:
#     print(result)

print(search_results)

[{'title': 'OPENSEA GROUP LIMITED - Find and update company information', 'href': 'https://find-and-update.company-information.service.gov.uk/company/13549031', 'body': 'OPENSEA GROUP LIMITED - Free company information from Companies House including registered office address, filing history, accounts, annual return, officers, charges, business activity ... Company Overview for OPENSEA GROUP LIMITED (13549031) Filing history for OPENSEA GROUP LIMITED (13549031) People for OPENSEA GROUP LIMITED ...'}, {'title': 'Welcoming Chris Dixon to our Board of Directors | OpenSea', 'href': 'https://opensea.io/blog/articles/welcoming-chris-dixon-to-our-board-of-directors', 'body': 'Published. July 30, 2022. I\'m excited to formally welcome Chris Dixon to the OpenSea Board of Directors. Chris has been extremely influential in shaping my views on web3. I followed his writing closely when I first fell down the "crypto rabbithole," and met him in 2021 when a16z first expressed interest in investing in O

In [16]:
### Search Results Analysis

In [4]:
import os
from sf_researcher.utils.validators import *
from sf_researcher.utils.llm import *
from sf_researcher.master.prompts import *
from sf_primary_agent import SFPrimaryAgent

LANGCHAIN_TRACING_V2 = os.getenv("LANGCHAIN_TRACING_V2")
LANGCHAIN_ENDPOINT = os.getenv("LANGCHAIN_ENDPOINT")
LANGCHAIN_API_KEY = os.getenv("LANGCHAIN_API_KEY")
LANGCHAIN_PROJECT = os.getenv("LANGCHAIN_PROJECT")

In [5]:
request = {
    "query": "OpenSea",
    "contacts": [],
    "include_domains": [],
    "exclude_domains": [],
    "parent_sub_queries": [
        "\"OpenSea\" company registered directors"
    ],
    "child_sub_queries": [
        "director data"
    ],
    "salesforce_id": "00QQy000007D0hKMAS",
    "parent_retreiver_queries": [
        "OpenSea directors"
    ],
    "child_retreiver_queries": [
        "director data"
    ]
}
main_report_type="compliance_report"
child_report_type="contact_report"

In [6]:
researcher = SFPrimaryAgent(
        query=request["query"],
        namespace=request["salesforce_id"],
        source_urls=None,
        config_path="",
        contacts=request["contacts"],
        include_domains=request["include_domains"],
        exclude_domains=request["exclude_domains"],
        main_report_type=main_report_type,
        child_report_type=child_report_type,
        parent_sub_queries=request["parent_sub_queries"],
        child_sub_queries=request["child_sub_queries"],
        parent_retreiver_queries=request["parent_retreiver_queries"],
        child_retreiver_queries=request["child_retreiver_queries"]
    )

INFO:sf_primary_agent:No contacts provided. Contacts will be constructed.


In [8]:
overall_goal = "acquire relevant web search results on OpenSea and it's related Directors"
initial_analysis = await analyze_search_result(search_results, overall_goal, config=researcher.main_task_assistant.cfg)
print(initial_analysis)


🤖 llm.py analyze_search_result Calling gpt-3.5-turbo...



INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


items=[InitialSearchResult(title='OPENSEA GROUP LIMITED - Find and update company information', href='https://find-and-update.company-information.service.gov.uk/company/13549031', search_category=<SearchContentTypeEnum.company_information: 'Company Information'>, source_type=<SourceTypeEnum.government_filing: 'Government Filing'>, exclusion=False, reason='Contains company information and filing history'), InitialSearchResult(title='Welcoming Chris Dixon to our Board of Directors | OpenSea', href='https://opensea.io/blog/articles/welcoming-chris-dixon-to-our-board-of-directors', search_category=<SearchContentTypeEnum.director_news: 'Director News'>, source_type=<SourceTypeEnum.official_website: 'Official Website and Blog'>, exclusion=False, reason='Announcement of new board member'), InitialSearchResult(title='OpenSea - Wikipedia', href='https://en.wikipedia.org/wiki/OpenSea', search_category=<SearchContentTypeEnum.company_information: 'Company Information'>, source_type=<SourceTypeEnum

### Scraping Tasks

In [9]:
import os
from apify_client import ApifyClient

In [10]:
apify_client = ApifyClient(os.getenv('APIFY_API_KEY'))

In [11]:
def create_scraping_task(apify_client, actor_id, url):
    # Define the input for the actor
    run_input = {
        'startUrls': [{'url': url}],
        'maxCrawlingDepth': 0,
        'maxPagesPerCrawl': 1,
        'proxyConfiguration': {'useApifyProxy': True},
    }

    # Run the actor and wait for the result
    run = apify_client.actor(actor_id).call(run_input=run_input)
    
    return run['defaultDatasetId']

In [12]:
def fetch_results(client, dataset_id):
    # Fetch the results from the dataset
    dataset_items = client.dataset(dataset_id).list_items().items
    
    # Extract raw text content
    if dataset_items:
        raw_data = dataset_items[0].get('text', '')
        return raw_data
    else:
        return ''

In [13]:
# Define the actor ID for a web scraper, e.g., "apify/web-scraper"
actor_id = 'apify/website-content-crawler'

In [14]:
url_to_scrape = "https://www.datanyze.com/companies/opensea/452495898"

In [15]:
# Create and run the scraping task
dataset_id = create_scraping_task(apify_client, actor_id, url_to_scrape)

INFO:httpx:HTTP Request: POST https://api.apify.com/v2/acts/apify~website-content-crawler/runs "HTTP/1.1 201 Created"
INFO:httpx:HTTP Request: GET https://api.apify.com/v2/actor-runs/nGbdKttmeb9v6akwd?waitForFinish=999999 "HTTP/1.1 200 OK"


In [16]:
print(dataset_id)

01XgIpjmnSeIy0QOj


In [17]:
# Fetch the raw HTML results
raw_data = fetch_results(apify_client, dataset_id)
print(raw_data)

INFO:httpx:HTTP Request: GET https://api.apify.com/v2/datasets/01XgIpjmnSeIy0QOj/items "HTTP/1.1 200 OK"


OpenSea Company Profile | Management and Employees List
OpenSea Profile and History
OpenSea was founded in 2017 and is the first peer-to-peer marketplace for blockchain-based assets, which include collectibles, gaming items, digital art, domain names, event tickets, and physical assets backed by a blockchain. OpenSea is headquartered in New York, United States. 
Popular Searches
OpenSea Inc
Opensea
Open Earth Foundation
OpenSea.pro
OpenSea.pro Inc
Primary Industries
Contact Information
Headquarters
Revenue 
$42 M
Employees 
200
Founded 
2017
Funding History
OpenSea raised a total of $425 M in funding over 4 rounds.
OpenSea Executive Team & Key Decision Makers
Recently Updated Team Members
Name & TitleSocialContact Info
Stephen Chen
Revenue Accounting and Financial Operations
		
Email
Phone
	
Reveal for Free
	
Janindu Dharmadasa
Small Business Owner
		
Email
Phone
	
Reveal for Free
	
John Towell
Staff Software Engineer
		
Email
Phone
	
Reveal for Free
	
Ali Janela
Senior Manager, Operat

In [18]:
# Parse the results into a dictionary
parsed_results = {
    "url": url_to_scrape,
    "raw_data": raw_data
}

# Display the results
import json
print(json.dumps(parsed_results, indent=4))

{
    "url": "https://www.datanyze.com/companies/opensea/452495898",
    "raw_data": "OpenSea Company Profile | Management and Employees List\nOpenSea Profile and History\nOpenSea was founded in 2017 and is the first peer-to-peer marketplace for blockchain-based assets, which include collectibles, gaming items, digital art, domain names, event tickets, and physical assets backed by a blockchain. OpenSea is headquartered in New York, United States. \nPopular Searches\nOpenSea Inc\nOpensea\nOpen Earth Foundation\nOpenSea.pro\nOpenSea.pro Inc\nPrimary Industries\nContact Information\nHeadquarters\nRevenue \n$42 M\nEmployees \n200\nFounded \n2017\nFunding History\nOpenSea raised a total of $425 M in funding over 4 rounds.\nOpenSea Executive Team & Key Decision Makers\nRecently Updated Team Members\nName & TitleSocialContact Info\nStephen Chen\nRevenue Accounting and Financial Operations\n\t\t\nEmail\nPhone\n\t\nReveal for Free\n\t\nJanindu Dharmadasa\nSmall Business Owner\n\t\t\nEmail\nPho

1.	Item 1:
	•	title: OPENSEA GROUP LIMITED - Find and update company information
	•	href: link
	•	search_category: Company Information
	•	source_type: Official Government Website
	2.	Item 2:
	•	title: OpenSea Company Profile | Management and Employees List - Datanyze
	•	href: link
	•	search_category: Company Profile
	•	source_type: Business Directory
	3.	Item 3:
	•	title: OpenSea - Wikipedia
	•	href: link
	•	search_category: Company Profile
	•	source_type: Encyclopedia
	4.	Item 4:
	•	title: Welcoming Chris Dixon to our Board of Directors | OpenSea
	•	href: link
	•	search_category: Company News
	•	source_type: Company Blog
	5.	Item 5:
	•	title: Category - Company | OpenSea
	•	href: link
	•	search_category: Company News
	•	source_type: Company Blog
	6.	Item 6:
	•	title: OpenSea CEO and Key Executive Team | Craft.co
	•	href: link
	•	search_category: Company Profile
	•	source_type: Business Directory
	7.	Item 7:
	•	title: OpenSea | Company Overview & News - Forbes
	•	href: link
	•	search_category: Company Overview
	•	source_type: News Website
	8.	Item 8:
	•	title: OpenSea’s Leadership Team - Team members and org chart | The Org
	•	href: link
	•	search_category: Company Profile
	•	source_type: Organizational Chart
	9.	Item 9:
	•	title: OpenSea - Founders and Board of Directors - Tracxn
	•	href: link
	•	search_category: Company Profile
	•	source_type: Business Directory
	10.	Item 10:
	•	title: OpenSea | The Org
	•	href: link
	•	search_category: Company Profile
	•	source_type: Organizational Chart