In [1]:
import json
import os
from dotenv import load_dotenv
from pymongo import MongoClient
from googlesearch import search
import requests

# Load environment variables
load_dotenv()

# Get MongoDB URI from .env file
mongodb_uri = os.getenv('13F_MongoDB_URI')

# Connect to MongoDB
client = MongoClient(mongodb_uri)
db = client['13f_filings']
firms_collection = db['investment_firms']
linkedin_collection = db['linkedin_urls']
apollo_collection = db['apollo_results']

def search_company(company_name):
    query = f"{company_name} LLC site:linkedin.com"
    search_results = []
    
    try:
        for j in search(query, tld="co.in", num=10, stop=10, pause=2):
            if "https://www.linkedin.com/in" in j:
                search_results.append(j)
    except Exception as e:
        print(f"Error searching for {company_name}: {str(e)}")
    
    return search_results

def process_linkedin_urls(linkedin_urls):
    url = "https://api.apollo.io/api/v1/people/bulk_match"
    headers = {
        'Cache-Control': 'no-cache',
        'Content-Type': 'application/json',
        'X-Api-Key': "YOUR_APOLLO_API_KEY"
    }

    data = {
        "reveal_personal_emails": True,
        "reveal_phone_number": True,
        "webhook_url": "https://your_webhook_site",
        "details": [{"linkedin_url": url} for url in linkedin_urls]
    }
    
    response = requests.post(url, headers=headers, json=data)
    
    results = []

    if response.status_code == 200:
        api_response = response.json()
        
        for match in api_response.get('matches', []):
            result = {
                "linkedin_url": match.get('linkedin_url'),
                "email": match.get('email'),
                "first_name": match.get('first_name'),
                "last_name": match.get('last_name'),
                "title": match.get('title')
            }
            results.append(result)
    else:
        results.append({"error": f"Error: {response.status_code} - {response.text}"})

    return results

def main():
    # Extract firm names from the collection
    firm_names = [doc['Firm Name'] for doc in firms_collection.find({}, {'Firm Name': 1, '_id': 0}) if 'Firm Name' in doc]

    # Dictionary to store all LinkedIn results
    linkedin_results = {}

    # Process each company
    for company_name in firm_names:
        linkedin_links = search_company(company_name)
        linkedin_results[company_name] = linkedin_links
        
        print(f"Processed {company_name}")

    # Save LinkedIn results to a JSON file
    with open('linkedin_urls.json', 'w') as f:
        json.dump(linkedin_results, f, indent=2)

    print("LinkedIn URLs saved to linkedin_urls.json")

    # Process LinkedIn URLs with Apollo.io
    all_linkedin_urls = [url for urls in linkedin_results.values() for url in urls]
    apollo_results = process_linkedin_urls(all_linkedin_urls)

    # Save Apollo results to a JSON file
    with open('apollo_results.json', 'w') as f:
        json.dump(apollo_results, f, indent=2)

    print("Apollo results saved to apollo_results.json")

    # Push LinkedIn results to MongoDB
    linkedin_collection.insert_one(linkedin_results)

    # Push Apollo results to MongoDB
    apollo_collection.insert_many(apollo_results)

    print("Results pushed to MongoDB")

    # Close the MongoDB connection
    client.close()

if __name__ == "__main__":
    main()

Processed Creekside Partners
Processed Gordian Capital Singapore Pte Ltd
Processed Brown Financial Advisors
Processed NCP Inc.
Processed SW Investment Management LLC
Processed Centennial Bank AR
Processed Mendon Capital Advisors Corp
Processed Pensioenfonds Rail & OV
Processed Strategic Investment Solutions Inc. IL
Processed University of Texas Texas AM Investment Management Co.
Processed SRN Advisors LLC
Processed Cowen Investment Management LLC
Processed Maven Securities LTD
Processed Gibson Wealth Advisors LLC
Processed RiverFront Investment Group LLC
Processed Heritage Oak Wealth Advisors LLC
Processed Kestra Investment Management LLC
Processed Sycomore Asset Management
Processed Birmingham Capital Management Co. Inc. AL
Processed Farmers National Bank
Processed Souders Financial Advisors
Processed Logos Global Management LP
Processed Clearbridge Investments LLC
Processed Carrera Capital Advisors
Processed New Wave Wealth Advisors LLC
Error searching for Dana Investment Advisors In

KeyboardInterrupt: 