In [None]:
import requests
import pandas as pd
import logging
import json
import time

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

api_key = ''  

# Manual API request function
def manual_api_request():
    url = "https://api.crunchbase.com/api/v4/autocompletes"
    headers = {
        "accept": "application/json",
        "x-cb-user-key": api_key
    }
    params = {
        "query": "Oracle",
        "collection_ids": "organizations",
        "limit": 1
    }
    try:
        response = requests.get(url, headers=headers, params=params)
        response.raise_for_status()
        logger.info("Manual API request successful")
        logger.info(f"Response status code: {response.status_code}")
        logger.info(f"Response content: {json.dumps(response.json(), indent=2)}")
    except requests.exceptions.RequestException as e:
        logger.error(f"Manual API request failed: {str(e)}")
        if response.text:
            logger.error(f"Response content: {response.text}")

# Perform manual API request
manual_api_request()

# Load the CSV file
df = pd.read_csv('AllDatav2.csv')

# Ensure the 'Company' column exists
if 'Company' not in df.columns:
    raise KeyError("The 'Company' column is not present in the CSV file")

def get_entity_id(company_name):
    url = "https://api.crunchbase.com/api/v4/autocompletes"
    headers = {
        "accept": "application/json",
        "x-cb-user-key": api_key
    }
    params = {
        "query": company_name,
        "collection_ids": "organizations",
        "limit": 1
    }
    try:
        response = requests.get(url, headers=headers, params=params)
        response.raise_for_status()
        data = response.json()
        if 'entities' in data and data['entities']:
            return data['entities'][0]['identifier']['uuid']
        else:
            logger.warning(f"No entity found for company: {company_name}")
    except requests.exceptions.RequestException as e:
        logger.error(f"Error getting entity ID for {company_name}: {str(e)}")
        if response.text:
            logger.error(f"Response content: {response.text}")
    return None

def get_organization(entity_id, card_ids=None):
    url = f"https://api.crunchbase.com/api/v4/entities/organizations/{entity_id}"
    headers = {
        "accept": "application/json",
        "x-cb-user-key": api_key
    }
    params = {}
    if card_ids:
        params['card_ids'] = json.dumps(card_ids)
    try:
        response = requests.get(url, headers=headers, params=params)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        logger.error(f"Error getting organization data for entity ID {entity_id}: {str(e)}")
        if response.text:
            logger.error(f"Response content: {response.text}")
    return None

company_data = []

# Process all companies
for name in df['Company'].tolist():
    logger.info(f"Processing company: {name}")
    entity_id = get_entity_id(name)
    if entity_id:
        logger.info(f"Found entity ID for {name}: {entity_id}")
        organization = get_organization(entity_id, card_ids=[
            "acquiree_acquisitions", "acquirer_acquisitions", "child_organizations",
            "child_ownerships", "event_appearances", "fields", "founders",
            "headquarters_address", "investors", "ipos", "jobs", "key_employee_changes",
            "layoffs", "parent_organization", "parent_ownership",
            "participated_funding_rounds", "participated_funds", "participated_investments",
            "press_references", "raised_funding_rounds", "raised_funds", "raised_investments"
        ])
        if organization:
            company_info = {
                'Company Name': name,
                'Entity ID': entity_id,
                'Organization': organization
            }
            company_data.append(company_info)
            logger.info(f"Added data for {name}")
        else:
            logger.warning(f"No organization data found for {name} (Entity ID: {entity_id})")
    else:
        logger.warning(f"No entity ID found for company: {name}")
    
    # Add delay between requests
    time.sleep(5)  # Sleep for 5 second between each request

# Convert to DataFrame
df_company_data = pd.DataFrame(company_data)

print(df_company_data)
print(f"Total companies processed: {len(df_company_data)}")

df_company_data.to_csv('crunchbase_company_data.csv', index=False)
