In [1]:
import json
from duckduckgo_search import DDGS

def search_company(company_name):
    search_query = f"{company_name} site:linkedin.com"
    
    with DDGS() as ddgs:
        results = list(ddgs.text(search_query, max_results=10))
    
    return results

# Example usage
if __name__ == "__main__":
    company = "SRN Advisors LLC"
    search_results = search_company(company)
    
    json_results = {
        "company": company,
        "results": []
    }
    
    for result in search_results:
        json_results["results"].append({
            "title": result['title'],
            "url": result['href'],
            "body": result['body']
        })
    
    # Convert to JSON string
    json_string = json.dumps(json_results, indent=2)
    
    # If you want to save to a file:
    # with open(f"{company.replace(' ', '_')}_results.json", 'w') as f:
    #     json.dump(json_results, f, indent=2)
    
    # Print the JSON string
    print(json_string)

{
  "company": "SRN Advisors LLC",
  "results": [
    {
      "title": "Chris Cappillo - SVP - SRN Advisors LLC | LinkedIn",
      "url": "https://www.linkedin.com/in/chris-cappillo-4474a27",
      "body": "View Chris Cappillo's profile on LinkedIn, the world's largest professional community. Chris has 13 jobs listed on their profile. See the complete profile on LinkedIn and discover Chris ..."
    },
    {
      "title": "Scott Freeze - SRN Advisors, LLC | LinkedIn",
      "url": "https://www.linkedin.com/in/scott-freeze-9973341b2",
      "body": "Experience: SRN Advisors, LLC \u00b7 Location: Huntingdon Valley \u00b7 49 connections on LinkedIn. View Scott Freeze's profile on LinkedIn, a professional community of 1 billion members."
    },
    {
      "title": "Michael Blaszczyk - Chief Compliance Officer - SRN Advisors | LinkedIn",
      "url": "https://www.linkedin.com/in/michael-blaszczyk-8013381b2",
      "body": "View Michael Blaszczyk's profile on LinkedIn, the world's largest p

In [13]:
import anthropic
import os
from dotenv import load_dotenv

load_dotenv()

Anthropic_Key = os.getenv("13F_Anthropic_Key")

client = anthropic.Anthropic(api_key = Anthropic_Key)

system_prompt = """

You will be provided with information about various websites, including their names, links, and descriptions. Your task is to identify and list the names of websites that are specifically about people.

Carefully analyze the provided website information. For each website, determine if its primary focus is on a person or people. This could include biographical websites, personal blogs, or sites dedicated to public figures.

After your analysis, provide your response in the following format:

<answer>
[List the urls of websites about people here, one per line. If there are no such websites, state "No websites about people found."]
</answer>

Important notes:
- Only include websites that are primarily about specific individuals or groups of people.
- If there are multiple relevant websites, list all of them in a comma separated format
- If no websites are about people, simply state "No websites about people found." in your answer.
- Do not include any explanations or additional commentary in your answer, just the list of names or the "No websites found" statement.

"""

def extract_relevant_links(search_results):
    message = client.messages.create(
        model="claude-3-5-sonnet-20240620",
        max_tokens=1000,
        temperature=0,
        system=system_prompt,
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": f"{search_results}"
                    }
                ]
            }
        ]
    )
    print(message.content[0].text)
    return message.content[0].text

extracted_urls = extract_relevant_links(json_string)

<answer>
https://www.linkedin.com/in/chris-cappillo-4474a27,https://www.linkedin.com/in/scott-freeze-9973341b2,https://www.linkedin.com/in/michael-blaszczyk-8013381b2,https://www.linkedin.com/in/michael-blaszczyk-25952852,https://www.linkedin.com/in/maralyn-ferrer-80a266131,https://www.linkedin.com/in/john-lapertosa-8b27915,https://www.linkedin.com/in/mrcooper7,https://www.linkedin.com/in/sean-mcconnell-b6152649,https://www.linkedin.com/in/isabella-arroyo-602072300
</answer>


In [14]:
type(extracted_urls)

str

In [17]:
import re

def remove_answer_tag(text):
    return re.sub(r'</?answer>', '', text)

cleaned_extracted_urls = remove_answer_tag(extracted_urls)
print(cleaned_extracted_urls)


https://www.linkedin.com/in/chris-cappillo-4474a27,https://www.linkedin.com/in/scott-freeze-9973341b2,https://www.linkedin.com/in/michael-blaszczyk-8013381b2,https://www.linkedin.com/in/michael-blaszczyk-25952852,https://www.linkedin.com/in/maralyn-ferrer-80a266131,https://www.linkedin.com/in/john-lapertosa-8b27915,https://www.linkedin.com/in/mrcooper7,https://www.linkedin.com/in/sean-mcconnell-b6152649,https://www.linkedin.com/in/isabella-arroyo-602072300



In [18]:
def parse_comma_separated_string(input_string):
    # Split the string by commas
    items = input_string.split(',')
    
    # Strip whitespace from each item
    cleaned_items = [item.strip() for item in items]
    
    # Remove any empty items
    result = [item for item in cleaned_items if item]
    
    return result

list_of_extracted_urls = parse_comma_separated_string(cleaned_extracted_urls)

In [19]:
list_of_extracted_urls

['https://www.linkedin.com/in/chris-cappillo-4474a27',
 'https://www.linkedin.com/in/scott-freeze-9973341b2',
 'https://www.linkedin.com/in/michael-blaszczyk-8013381b2',
 'https://www.linkedin.com/in/michael-blaszczyk-25952852',
 'https://www.linkedin.com/in/maralyn-ferrer-80a266131',
 'https://www.linkedin.com/in/john-lapertosa-8b27915',
 'https://www.linkedin.com/in/mrcooper7',
 'https://www.linkedin.com/in/sean-mcconnell-b6152649',
 'https://www.linkedin.com/in/isabella-arroyo-602072300']