In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
import glob
import requests
import time


In [None]:
parquet_path = "./Staging/Gold/cleaned_second_592.parquet"
if os.path.exists(parquet_path):
    RecordOwl_Leads = pd.read_parquet(parquet_path, engine="fastparquet")
    print(f"Loaded {len(RecordOwl_Leads)} rows from {parquet_path}")
    print(RecordOwl_Leads.shape)
else:
    raise FileNotFoundError(f"Parquet file not found at {parquet_path}")

filtered_df = RecordOwl_Leads[
    RecordOwl_Leads["PIC NAME 1 Contact Number"].notna() &
    (RecordOwl_Leads["PIC NAME 1 Contact Number"] != "")
]

filtered_df

In [None]:
companies_df = filtered_df.copy()

### Lusha Scrapping

In [None]:

# API Configuration
API_KEY = "15304a36-d527-4f30-b250-b79cd409a464"
BASE_URL = "https://api.lusha.com"
CONTACT_SEARCH_ENDPOINT = f"{BASE_URL}/prospecting/contact/search"
CONTACT_ENRICH_ENDPOINT = f"{BASE_URL}/prospecting/contact/enrich"

headers = {
    "api_key": API_KEY,
    "Content-Type": "application/json"
}

# Key decision maker titles (prioritized)
DECISION_MAKER_KEYWORDS = [
    'ceo', 'chief executive', 'founder', 'co-founder', 'owner', 'president',
    'director', 'managing director', 'general manager', 'head', 'principal',
    'chairman', 'chairwoman', 'partner', 'proprietor'
]

def is_decision_maker(title):
    """Check if title indicates a key decision maker"""
    if not title or title == 'N/A':
        return False
    title_lower = title.lower()
    return any(keyword in title_lower for keyword in DECISION_MAKER_KEYWORDS)

def has_singapore_number(contact_data):
    """Check if contact has a Singapore mobile number in the metadata"""
    # Check if contact has phone/mobile indicators
    has_phone = contact_data.get('hasPhones', False)
    has_mobile = contact_data.get('hasMobilePhone', False)
    
    # We need to enrich to see actual number, but these flags help us filter
    return has_phone or has_mobile

def extract_singapore_mobile(phone_numbers):
    """
    Extract Singapore mobile number from phone numbers list.
    Singapore mobile numbers: +65 8XXX XXXX or +65 9XXX XXXX
    Returns the first mobile number found, or None if no mobile found
    """
    if not phone_numbers or len(phone_numbers) == 0:
        return None
    
    for phone_obj in phone_numbers:
        number = phone_obj.get('number', '')
        phone_type = phone_obj.get('type', '').lower()
        
        # Clean the number for checking
        clean_number = number.replace(' ', '').replace('-', '')
        
        # Check if it's a Singapore number
        if clean_number.startswith('+65'):
            # Check if it's a mobile (starts with 8 or 9)
            if len(clean_number) >= 4:
                mobile_prefix = clean_number[3]  # Character after +65
                if mobile_prefix in ['8', '9']:
                    return number
        
        # Also check by type if available
        if phone_type in ['mobile', 'mobile_phone', 'personal']:
            if clean_number.startswith('+65'):
                return number
    
    return None

def enrich_contacts_batch(request_id, contact_ids):
    """Enrich multiple contacts and return full phone data"""
    if not contact_ids:
        return {}
    
    enrich_payload = {
        "requestId": request_id,
        "contactIds": contact_ids
    }
    
    try:
        response = requests.post(CONTACT_ENRICH_ENDPOINT, headers=headers, json=enrich_payload)
        
        if response.status_code in [200, 201]:
            data = response.json()
            
            contact_map = {}
            if data.get('contacts') and isinstance(data['contacts'], list):
                for contact in data['contacts']:
                    contact_id = contact.get('id') or contact.get('contactId')
                    phones = contact.get('data', {}).get('phoneNumbers', [])
                    contact_map[contact_id] = phones
            
            return contact_map
        else:
            print(f"  [ERROR] Enrich Error {response.status_code}")
            return {}
    
    except Exception as e:
        print(f"  [ERROR] Enrich Exception: {str(e)}")
        return {}

def search_all_companies_optimized(company_names, batch_size=40):
    """Search all companies in batches to minimize credits"""
    all_contacts_by_company = {}
    total_credits = 0
    
    for batch_start in range(0, len(company_names), batch_size):
        batch = company_names[batch_start:batch_start + batch_size]
        
        print(f"\n{'='*80}")
        print(f"BATCH {batch_start//batch_size + 1}: Searching {len(batch)} companies in ONE request")
        print(f"{'='*80}")
        
        payload = {
            "pages": {"page": 0, "size": batch_size},
            "filters": {
                "contacts": {
                    "include": {"existing_data_points": ["phone", "mobile_phone"]}
                },
                "companies": {
                    "include": {"names": batch}
                }
            }
        }
        
        try:
            response = requests.post(CONTACT_SEARCH_ENDPOINT, headers=headers, json=payload)
            
            if response.status_code in [200, 201]:
                data = response.json()
                contacts = data.get('data', [])
                request_id = data.get('requestId', '')
                credits = data.get('billing', {}).get('creditsCharged', 0)
                total_credits += credits
                
                print(f"[OK] Found {len(contacts)} total contacts across {len(batch)} companies")
                print(f"[OK] Credits charged: {credits} (vs {len(batch)} if searched individually)")
                print(f"[OK] Savings: {len(batch) - credits} credits!")
                
                # Group contacts by company
                for contact in contacts:
                    company = contact.get('companyName', '')
                    if company not in all_contacts_by_company:
                        all_contacts_by_company[company] = []
                    all_contacts_by_company[company].append({
                        'contact': contact,
                        'request_id': request_id
                    })
                
                time.sleep(0.1)
            else:
                print(f"[ERROR] Search Error {response.status_code}: {response.text[:200]}")
        
        except Exception as e:
            print(f"[ERROR] Exception: {str(e)}")
    
    print(f"\n{'='*80}")
    print(f"[CREDITS] Total search credits used: {total_credits} (vs {len(company_names)} individual searches)")
    print(f"[SAVINGS] Credit savings: {len(company_names) - total_credits}")
    print(f"{'='*80}\n")
    
    return all_contacts_by_company, total_credits

def get_company_contacts_sg_optimized(companies_df):
    """
    OPTIMIZED: Get best Singapore contact for each company
    - Filters for Singapore mobile numbers (+65 8XXX/9XXX)
    - Prioritizes key decision makers
    - Minimal credit usage
    """
    company_names = companies_df['ACRA REGISTERED NAME'].tolist()
    
    # Step 1: Search all companies in batches
    contacts_by_company, search_credits = search_all_companies_optimized(company_names, batch_size=40)
    
    # Step 2: Filter and enrich only Singapore decision makers
    results = []
    enrich_credits = 0
    
    print("\n" + "="*80)
    print("FINDING BEST SINGAPORE CONTACT PER COMPANY")
    print("="*80)
    
    for company_name in company_names:
        # Find matching company (case-insensitive partial match)
        matched_company = None
        for key in contacts_by_company.keys():
            if company_name.upper() in key.upper() or key.upper() in company_name.upper():
                matched_company = key
                break
        
        if matched_company and contacts_by_company[matched_company]:
            all_contacts = contacts_by_company[matched_company]
            
            print(f"\n{company_name}:")
            print(f"  Found {len(all_contacts)} contacts, filtering for SG decision makers...")
            
            # Prioritize: First try decision makers, then any contact
            decision_makers = [c for c in all_contacts if is_decision_maker(c['contact'].get('jobTitle', ''))]
            
            contacts_to_check = decision_makers if decision_makers else all_contacts
            
            best_contact = None
            best_phone = None
            
            # Check contacts for Singapore mobile numbers
            for contact_data in contacts_to_check:
                contact = contact_data['contact']
                request_id = contact_data['request_id']
                contact_id = contact.get('contactId')
                contact_name = contact.get('name', 'N/A')
                contact_title = contact.get('jobTitle', 'N/A')
                
                print(f"  > Checking: {contact_name} ({contact_title})")
                
                # Enrich to check for Singapore mobile
                if contact_id and request_id:
                    phone_map = enrich_contacts_batch(request_id, [contact_id])
                    phones = phone_map.get(contact_id, [])
                    
                    sg_mobile = extract_singapore_mobile(phones)
                    enrich_credits += 1
                    
                    if sg_mobile:
                        print(f"    [SG MOBILE FOUND] {sg_mobile}")
                        best_contact = contact
                        best_phone = sg_mobile
                        break  # Found what we need!
                    else:
                        print(f"    [SKIP] No SG mobile number")
                
                time.sleep(0.1)
            
            if best_contact and best_phone:
                results.append({
                    'Company': company_name,
                    'Contact_Name': best_contact.get('name', 'N/A'),
                    'Contact_Title': best_contact.get('jobTitle', 'N/A'),
                    'Contact_Number': best_phone,
                    'Is_Decision_Maker': 'Yes' if is_decision_maker(best_contact.get('jobTitle', '')) else 'No'
                })
            else:
                print(f"  [NONE] No Singapore mobile number found")
                results.append({
                    'Company': company_name,
                    'Contact_Name': 'N/A',
                    'Contact_Title': 'N/A',
                    'Contact_Number': 'N/A',
                    'Is_Decision_Maker': 'N/A'
                })
        else:
            print(f"\n{company_name}: No contacts found")
            results.append({
                'Company': company_name,
                'Contact_Name': 'N/A',
                'Contact_Title': 'N/A',
                'Contact_Number': 'N/A',
                'Is_Decision_Maker': 'N/A'
            })
    
    print(f"\n{'='*80}")
    print(f"[CREDITS] TOTAL CREDITS USED:")
    print(f"   Search: {search_credits} credits")
    print(f"   Enrich: {enrich_credits} credits")
    print(f"   Total: {search_credits + enrich_credits} credits")
    print(f"\n[SAVINGS] vs individual search: {len(company_names) - search_credits} credits!")
    print(f"{'='*80}\n")
    
    return pd.DataFrame(results)

# Run optimized version
print("="*80)
print("OPTIMIZED SG CONTACT SEARCH - KEY DECISION MAKERS ONLY")
print("="*80)

results_df = get_company_contacts_sg_optimized(companies_df)

print("\nFINAL RESULTS:")
print("="*80)
print(results_df.to_string(index=False))

# Add to dataframe
companies_df['Contact_Name'] = results_df['Contact_Name']
companies_df['Contact_Title'] = results_df['Contact_Title']
companies_df['Contact_Number'] = results_df['Contact_Number']
companies_df['Is_Decision_Maker'] = results_df['Is_Decision_Maker']


In [None]:
# Assign Lusha contacts to appropriate PIC columns based on availability
def assign_lusha_to_pic_columns(df):
    """
    Assign Lusha contact data to PIC columns intelligently:
    - If PIC NAME 1 Contact Number is empty -> assign to PIC 1 (First Name + Last Name)
    - Else if PIC NAME 2 Contact Number is empty -> assign to PIC 2 (First Name 2 + Last Name 2)
    - Else if PIC NAME 3 Contact Number is empty -> assign to PIC 3 (First Name 3 + Last Name 3)
    - Maintains matching between name and number columns
    - Removes '+' from phone numbers
    - Removes temporary Lusha columns after assignment
    """
    
    print("="*80)
    print("ASSIGNING LUSHA CONTACTS TO PIC COLUMNS")
    print("="*80)
    
    assigned_to_pic1 = 0
    assigned_to_pic2 = 0
    assigned_to_pic3 = 0
    skipped = 0
    
    for idx, row in df.iterrows():
        contact_name = row.get('Contact_Name', 'N/A')
        contact_title = row.get('Contact_Title', 'N/A')
        contact_number = row.get('Contact_Number', 'N/A')
        
        # Skip if no valid contact from Lusha
        if contact_number == 'N/A' or pd.isna(contact_number):
            skipped += 1
            continue
        
        # Clean phone number: Remove '+' and any leading/trailing spaces
        clean_number = str(contact_number).replace('+', '').strip()
        
        # Split name into first and last (simple split on first space)
        name_parts = contact_name.split(' ', 1) if contact_name != 'N/A' else ['N/A', '']
        first_name = name_parts[0]
        last_name = name_parts[1] if len(name_parts) > 1 else ''
        
        # Check PIC 1
        pic1_number = row.get('PIC NAME 1 Contact Number')
        if pd.isna(pic1_number) or pic1_number == '' or pic1_number == 'None':
            # Assign to PIC 1
            df.at[idx, 'First Name'] = first_name
            df.at[idx, 'Last Name'] = last_name
            df.at[idx, 'PIC Name 1 Designation'] = contact_title
            df.at[idx, 'PIC NAME 1 Contact Number'] = clean_number
            df.at[idx, 'Contact Number from Lusha?'] = 'Yes'
            assigned_to_pic1 += 1
            print(f"[PIC 1] {row['ACRA REGISTERED NAME'][:50]}: {contact_name} ({clean_number})")
            continue
        
        # Check PIC 2
        pic2_number = row.get('PIC NAME 2 Contact Number')
        if pd.isna(pic2_number) or pic2_number == '' or pic2_number == 'None':
            # Assign to PIC 2
            df.at[idx, 'First Name 2'] = first_name
            df.at[idx, 'Last Name 2'] = last_name
            df.at[idx, 'PIC Name 2 Designation'] = contact_title
            df.at[idx, 'PIC NAME 2 Contact Number'] = clean_number
            df.at[idx, 'Contact Number from Lusha?'] = 'Yes'
            assigned_to_pic2 += 1
            print(f"[PIC 2] {row['ACRA REGISTERED NAME'][:50]}: {contact_name} ({clean_number})")
            continue
        
        # Check PIC 3
        pic3_number = row.get('PIC NAME 3 Contact Number')
        if pd.isna(pic3_number) or pic3_number == '' or pic3_number == 'None':
            # Assign to PIC 3
            df.at[idx, 'First Name 3'] = first_name
            df.at[idx, 'Last Name 3'] = last_name
            df.at[idx, 'PIC Name Designation 3'] = contact_title
            df.at[idx, 'PIC NAME 3 Contact Number'] = clean_number
            df.at[idx, 'Contact Number from Lusha?'] = 'Yes'
            assigned_to_pic3 += 1
            print(f"[PIC 3] {row['ACRA REGISTERED NAME'][:50]}: {contact_name} ({clean_number})")
            continue
        
        # All PIC slots are full
        print(f"[SKIP] {row['ACRA REGISTERED NAME'][:50]}: All PIC slots full")
        skipped += 1
    
    print(f"\n{'='*80}")
    print(f"ASSIGNMENT SUMMARY:")
    print(f"  Assigned to PIC 1: {assigned_to_pic1}")
    print(f"  Assigned to PIC 2: {assigned_to_pic2}")
    print(f"  Assigned to PIC 3: {assigned_to_pic3}")
    print(f"  Skipped (no contact or slots full): {skipped}")
    print(f"{'='*80}\n")
    
    return df

# Apply the assignment
companies_df = assign_lusha_to_pic_columns(companies_df)

# Show the updated PIC columns BEFORE cleanup
print("\nUPDATED PIC COLUMNS:")
print("="*80)
pic_cols = [
    'ACRA REGISTERED NAME',
    'First Name', 'Last Name', 'PIC Name 1 Designation', 'PIC NAME 1 Contact Number',
    'First Name 2', 'Last Name 2', 'PIC Name 2 Designation', 'PIC NAME 2 Contact Number',
    'First Name 3', 'Last Name 3', 'PIC Name Designation 3', 'PIC NAME 3 Contact Number',
    'Contact Number from Lusha?'
]
print(companies_df[pic_cols].to_string(index=False))

# Remove temporary Lusha columns
print("\n" + "="*80)
print("CLEANING UP TEMPORARY COLUMNS")
print("="*80)

lusha_temp_cols = ['Contact_Name', 'Contact_Title', 'Contact_Number', 'Is_Decision_Maker']
existing_temp_cols = [col for col in lusha_temp_cols if col in companies_df.columns]

if existing_temp_cols:
    companies_df = companies_df.drop(columns=existing_temp_cols)
    print(f"Removed temporary columns: {', '.join(existing_temp_cols)}")
else:
    print("No temporary columns to remove")

print(f"\nFinal DataFrame shape: {companies_df.shape}")
print(f"Final columns count: {len(companies_df.columns)}")

In [None]:
companies_df.columns.tolist()

In [None]:
companies_df_x = companies_df[["First Name", "Last Name", "PIC NAME 1 Contact Number","First Name 2", "Last Name 2", "PIC NAME 2 Contact Number"]]
companies_df_x

In [None]:
# companies_df.to_csv("./new_leads.csv")

In [None]:
# import requests
# import pandas as pd
# import time

# # API Configuration
# API_KEY = "15304a36-d527-4f30-b250-b79cd409a464"
# BASE_URL = "https://api.lusha.com"
# CONTACT_SEARCH_ENDPOINT = f"{BASE_URL}/prospecting/contact/search"
# CONTACT_ENRICH_ENDPOINT = f"{BASE_URL}/prospecting/contact/enrich"

# headers = {
#     "api_key": API_KEY,
#     "Content-Type": "application/json"
# }

# # OPTIMIZED: Search all companies at once instead of one-by-one
# def search_all_companies_optimized(company_names, batch_size=40):
#     """
#     Search for contacts across ALL companies in batches to minimize credit usage
    
#     Instead of 1 credit per company, we search in batches of up to 40 companies
#     for 1 credit per batch (up to 40 results)
    
#     Parameters:
#     - company_names: list of company names
#     - batch_size: max results per search (default 40, max before next page)
    
#     Returns:
#     - Dictionary mapping company name to list of contacts
#     """
#     all_contacts_by_company = {}
#     total_credits = 0
    
#     # Process companies in batches
#     for batch_start in range(0, len(company_names), batch_size):
#         batch = company_names[batch_start:batch_start + batch_size]
        
#         print(f"\n{'='*80}")
#         print(f"BATCH {batch_start//batch_size + 1}: Searching {len(batch)} companies in ONE request")
#         print(f"{'='*80}")
        
#         # Search for ALL companies in this batch at once
#         payload = {
#             "pages": {"page": 0, "size": batch_size},
#             "filters": {
#                 "contacts": {
#                     "include": {"existing_data_points": ["phone", "mobile_phone"]}
#                 },
#                 "companies": {
#                     "include": {"names": batch}  # Search multiple companies at once!
#                 }
#             }
#         }
        
#         try:
#             response = requests.post(CONTACT_SEARCH_ENDPOINT, headers=headers, json=payload)
            
#             if response.status_code in [200, 201]:
#                 data = response.json()
#                 contacts = data.get('data', [])
#                 request_id = data.get('requestId', '')
#                 credits = data.get('billing', {}).get('creditsCharged', 0)
#                 total_credits += credits
                
#                 print(f"âœ“ Found {len(contacts)} total contacts across {len(batch)} companies")
#                 print(f"âœ“ Credits charged: {credits} (vs {len(batch)} if searched individually)")
#                 print(f"âœ“ Savings: {len(batch) - credits} credits!")
                
#                 # Group contacts by company
#                 for contact in contacts:
#                     company = contact.get('companyName', '')
#                     if company not in all_contacts_by_company:
#                         all_contacts_by_company[company] = []
#                     all_contacts_by_company[company].append({
#                         'contact': contact,
#                         'request_id': request_id
#                     })
                
#                 time.sleep(0.1)
#             else:
#                 print(f"âœ— Search Error {response.status_code}")
        
#         except Exception as e:
#             print(f"âœ— Exception: {str(e)}")
    
#     print(f"\n{'='*80}")
#     print(f"ðŸ’³ Total search credits used: {total_credits} (vs {len(company_names)} individual searches)")
#     print(f"ðŸ’° Credit savings: {len(company_names) - total_credits}")
#     print(f"{'='*80}\n")
    
#     return all_contacts_by_company, total_credits

# # Function to enrich contacts
# def enrich_contacts_batch(request_id, contact_ids):
#     """Enrich multiple contacts at once"""
#     if not contact_ids:
#         return {}
    
#     enrich_payload = {
#         "requestId": request_id,
#         "contactIds": contact_ids
#     }
    
#     try:
#         response = requests.post(CONTACT_ENRICH_ENDPOINT, headers=headers, json=enrich_payload)
        
#         if response.status_code in [200, 201]:
#             data = response.json()
            
#             phone_map = {}
#             if data.get('contacts') and isinstance(data['contacts'], list):
#                 for contact in data['contacts']:
#                     contact_id = contact.get('id') or contact.get('contactId')
#                     phones = contact.get('data', {}).get('phoneNumbers', [])
                    
#                     if phones and len(phones) > 0:
#                         phone_map[contact_id] = phones[0].get('number', 'N/A')
#                     else:
#                         phone_map[contact_id] = 'N/A'
            
#             return phone_map
#         else:
#             print(f"  Enrich Error {response.status_code}")
#             return {}
    
#     except Exception as e:
#         print(f"  Enrich Exception: {str(e)}")
#         return {}

# # Main optimized function
# def get_company_contacts_optimized(companies_df):
#     """
#     OPTIMIZED: Get first contact for each company with minimal credit usage
#     """
#     company_names = companies_df['ACRA REGISTERED NAME'].tolist()
    
#     # Step 1: Search all companies in batches (1 credit per 40 companies instead of 1 per company!)
#     contacts_by_company, search_credits = search_all_companies_optimized(company_names, batch_size=40)
    
#     # Step 2: Enrich only the first contact per company
#     results = []
#     enrich_credits = 0
    
#     print("\n" + "="*80)
#     print("ENRICHING FIRST CONTACT PER COMPANY")
#     print("="*80)
    
#     for company_name in company_names:
#         # Find matching company (case-insensitive partial match)
#         matched_company = None
#         for key in contacts_by_company.keys():
#             if company_name.upper() in key.upper() or key.upper() in company_name.upper():
#                 matched_company = key
#                 break
        
#         if matched_company and contacts_by_company[matched_company]:
#             contact_data = contacts_by_company[matched_company][0]
#             contact = contact_data['contact']
#             request_id = contact_data['request_id']
            
#             contact_id = contact.get('contactId')
#             contact_name = contact.get('name', 'N/A')
#             contact_title = contact.get('jobTitle', 'N/A')
            
#             print(f"\n{company_name}:")
#             print(f"  â†’ {contact_name} ({contact_title})")
            
#             # Enrich to get phone
#             if contact_id and request_id:
#                 phone_map = enrich_contacts_batch(request_id, [contact_id])
#                 phone_number = phone_map.get(contact_id, 'N/A')
#                 enrich_credits += 1
                
#                 if phone_number and phone_number != 'N/A':
#                     print(f"  âœ“ Phone: {phone_number}")
#                 else:
#                     print(f"  âœ— No phone available")
#             else:
#                 phone_number = 'N/A'
            
#             results.append({
#                 'Company': company_name,
#                 'First_Contact_Name': contact_name,
#                 'First_Contact_Title': contact_title,
#                 'First_Contact_Number': phone_number
#             })
            
#             time.sleep(0.1)
#         else:
#             print(f"\n{company_name}: No contacts found")
#             results.append({
#                 'Company': company_name,
#                 'First_Contact_Name': 'N/A',
#                 'First_Contact_Title': 'N/A',
#                 'First_Contact_Number': 'N/A'
#             })
    
#     print(f"\n{'='*80}")
#     print(f"ðŸ’³ TOTAL CREDITS USED:")
#     print(f"   Search: {search_credits} credits")
#     print(f"   Enrich: {enrich_credits} credits")
#     print(f"   Total: {search_credits + enrich_credits} credits")
#     print(f"\nðŸ’° SAVINGS vs individual search: {len(company_names) - search_credits} credits!")
#     print(f"{'='*80}\n")
    
#     return pd.DataFrame(results)

# # Run optimized version
# print("="*80)
# print("OPTIMIZED COMPANY CONTACT SEARCH")
# print("="*80)

# results_df = get_company_contacts_optimized(companies_df)

# print("\nFINAL RESULTS:")
# print("="*80)
# print(results_df.to_string(index=False))

# # Add to dataframe
# companies_df['First_Contact_Name'] = results_df['First_Contact_Name']
# companies_df['First_Contact_Title'] = results_df['First_Contact_Title']
# companies_df['First_Contact_Number'] = results_df['First_Contact_Number']

# print("\n\nUPDATED DATAFRAME:")
# print("="*80)
# companies_df

In [None]:
# import requests
# import pandas as pd
# import time

# # Assuming you have companies_df loaded already
# # For demo purposes, let's load it from the Bronze parquet file
# # companies_df = pd.read_parquet("./Staging/Bronze/bronze_data_1.parquet", engine="fastparquet")
 
# # API Configuration
# API_KEY = "15304a36-d527-4f30-b250-b79cd409a464"
# BASE_URL = "https://api.lusha.com"
# CONTACT_SEARCH_ENDPOINT = f"{BASE_URL}/prospecting/contact/search"

# headers = {
#     "api_key": API_KEY,
#     "Content-Type": "application/json"
# }

# # Get the first company name from companies_df
# # Assuming companies_df is already loaded in your notebook
# company_name = companies_df['ACRA REGISTERED NAME'].iloc[0]
# print(f"Searching for contacts at: {company_name}\n")

# # Prepare the search payload
# # NOTE: Lusha API requires minimum page size of 10
# search_payload = {
#     "pages": {
#         "page": 0,
#         "size": 10  # Minimum required by Lusha API
#     },
#     "filters": {
#         "contacts": {
#             "include": {
#                 "existing_data_points": ["phone", "mobile_phone"]  # Only contacts with phone numbers
#             }
#         },
#         "companies": {
#             "include": {
#                 "names": [company_name]  # Search by company name
#             }
#         }
#     }
# }

# # Make the API request
# response = requests.post(CONTACT_SEARCH_ENDPOINT, headers=headers, json=search_payload)

# # Check response - Lusha returns 201 (Created) for successful searches
# if response.status_code in [200, 201]:
#     data = response.json()
#     total_found = len(data.get('data', []))
#     print(f"âœ“ Success! Found {total_found} contacts")
#     print(f"Credits charged: {data.get('billing', {}).get('creditsCharged', 'N/A')}\n")
    
#     # Extract contact information (limit to first 5)
#     contacts = []
#     for contact in data.get('data', [])[:5]:  # Take only first 5
#         contact_info = {
#             'Name': contact.get('name', 'N/A'),
#             'Title': contact.get('jobTitle', 'N/A'),
#             'Company': contact.get('companyName', company_name),
#             'Phone': 'Available' if contact.get('hasPhones') else 'N/A',
#             'Mobile': 'Available' if contact.get('hasMobilePhone') else 'N/A',
#             'Email': 'Available' if contact.get('hasEmails') else 'N/A',
#             'Contact_ID': contact.get('contactId', 'N/A')
#         }
#         contacts.append(contact_info)
    
#     # Create DataFrame
#     contacts_df = pd.DataFrame(contacts)
#     print(f"Showing first {len(contacts)} contacts:\n")
#     print(contacts_df.to_string(index=False))
    
# else:
#     print(f"âœ— Error {response.status_code}: {response.text}")

# print("\n" + "="*80 + "\n")

# # Function to search contacts for multiple companies
# def search_company_contacts(company_names, max_contacts_to_show=5):
#     """
#     Search for contacts across multiple companies
    
#     Parameters:
#     - company_names: list of company names from ACRA_REGISTERED_NAME column
#     - max_contacts_to_show: number of contacts to return per company (will request 10 from API, show this many)
    
#     Returns:
#     - DataFrame with all contacts found
#     """
#     all_contacts = []
    
#     for idx, company_name in enumerate(company_names, 1):
#         print(f"[{idx}/{len(company_names)}] Searching: {company_name}")
        
#         payload = {
#             "pages": {
#                 "page": 0,
#                 "size": 10  # Minimum required by Lusha API
#             },
#             "filters": {
#                 "contacts": {
#                     "include": {
#                         "existing_data_points": ["phone", "mobile_phone"]
#                     }
#                 },
#                 "companies": {
#                     "include": {
#                         "names": [company_name]
#                     }
#                 }
#             }
#         }
        
#         try:
#             response = requests.post(CONTACT_SEARCH_ENDPOINT, headers=headers, json=payload)
            
#             # Lusha returns 201 (Created) for successful searches
#             if response.status_code in [200, 201]:
#                 data = response.json()
#                 contacts_found = data.get('data', [])
                
#                 # Take only the number requested
#                 for contact in contacts_found[:max_contacts_to_show]:
#                     contact_info = {
#                         'Company': contact.get('companyName', company_name),
#                         'Name': contact.get('name', 'N/A'),
#                         'Title': contact.get('jobTitle', 'N/A'),
#                         'Has_Phone': 'Yes' if contact.get('hasPhones') else 'No',
#                         'Has_Mobile': 'Yes' if contact.get('hasMobilePhone') else 'No',
#                         'Has_Email': 'Yes' if contact.get('hasEmails') else 'No',
#                         'Contact_ID': contact.get('contactId', 'N/A'),
#                         'Person_ID': contact.get('personId', 'N/A')
#                     }
#                     all_contacts.append(contact_info)
                
#                 credits = data.get('billing', {}).get('creditsCharged', 0)
#                 print(f"  âœ“ Found {len(contacts_found)} contacts (showing {min(len(contacts_found), max_contacts_to_show)}) - Credits: {credits}")
#             else:
#                 print(f"  âœ— Error {response.status_code}: {response.text}")
        
#         except Exception as e:
#             print(f"  âœ— Exception: {str(e)}")
        
#         # Rate limiting: 25 requests per second max, so add small delay
#         time.sleep(0.05)
    
#     return pd.DataFrame(all_contacts)

# # Example: Search for contacts from first 3 companies in companies_df
# print("Searching multiple companies...")
# print("="*80)
# company_list = companies_df['ACRA REGISTERED NAME'].head(3).tolist()
# results_df = search_company_contacts(company_list, max_contacts_to_show=5)

# print("\n" + "="*80)
# print(f"\nTotal contacts found: {len(results_df)}")
# print("\nResults:")
# print(results_df.to_string(index=False))