# Fetching Companies and descriptions with Crunchbase

We have a list of the top 100 highest evaluated Deep Tech startups in Europe. We can now use the Crunchbase API to get infomration relevant for linking

In [31]:
import requests
import os

API_KEY = os.getenv("CRUNCHBASE_API_KEY")
BASE_URL = "https://api.crunchbase.com/v4/data/searches/organizations"

params = {"user_key": API_KEY}
headers = {"Content-Type": "application/json"}

payload = {
    "field_ids": ["identifier",
    "short_description",
    "description",
    "website",
    "company_type",
    "location_identifiers",
    "funding_total",
    "founded_on",
    "funding_stage",
    "category_groups",
    "categories"],
    "query":[
        {
        "type":"predicate",
        "field_id":"founded_on",
        "operator_id":"gte",
        "values":["2020-01-01"]
        },
        # {
        # "type":"predicate",
        # "field_id":"equity_funding_total",
        # "operator_id":"between",
        # "values":[
        #     {"value":5000000,"currency":"usd"},
        #     {"value":200000000,"currency":"usd"}
        # ]
        # },
        {
        "operator_id": "includes",
        "type": "predicate",
        "field_id": "funding_stage",
        "values": [
            "seed",
            "early_stage_venture",
            "late_stage_venture"
        ]
        },
        {
        "type":"predicate",
        "field_id":"location_identifiers",
        "operator_id":"includes",
        "values":["6106f5dc-823e-5da8-40d7-51612c0b2c4e"]
        }
  ],
    "order": [
        {
            "field_id": "equity_funding_total",
            "sort": "desc"
        }
    ],
    "limit": 200
}

response = requests.post(BASE_URL, params=params, headers=headers, json=payload)
response.raise_for_status()
data = response.json()


for item in data.get("entities", []):
    props = item.get("properties", {})
    identifier = props.get("identifier", {}).get("value", "")
    country = next((loc.get("value") for loc in props.get("location_identifiers", []) if loc.get("location_type") == "country"), None)
    founded_on = props.get("founded_on", {}).get("value", "")
    funding_total = props.get("funding_total", {}).get("value_usd", "")
    print(f"{identifier} | {country} | {founded_on} | {funding_total}")

    



Helsing | Germany | 2021-01-01 | 1524499872
Verkor | France | 2020-01-01 | 4041351633
Abound | United Kingdom | 2020-02-01 | 1979352879
Mistral AI | France | 2023-04-01 | 1188417938
Newcleo | France | 2021-01-01 | 677132243
JOKR | Germany | 2021-01-01 | 530000000
Electra | France | 2021-01-01 | 547768454
Verdiva Bio | United Kingdom | 2024-07-29 | 411000000
Razor Group | Germany | 2020-08-01 | 1147774035
Aira | Sweden | 2022-01-01 | 535425143
Zapp | United Kingdom | 2020-01-01 | 300000000
Beacon Therapeutics | United Kingdom | 2023-01-01 | 290139662
Nothing | United Kingdom | 2020-01-01 | 252882301
Shop Circle | United Kingdom | 2021-04-15 | 245000000
Olsam Group | United Kingdom | 2020-01-01 | 227225467
HysetCo | France | 2020-01-01 | 214822771
SynOx Therapeutics | Ireland | 2020-01-01 | 245905189
The Exploration Company | Germany | 2021-07-01 | 209857798
Vivid | Germany | 2020-01-01 | 204895003
Windward Bio | Switzerland | 2024-01-01 | 200000000
Kana Labs | United Kingdom | 2022-01-0

In [32]:
import json

# save response to raw json

with open("../data/company-data/crunchbase_fetching_raw.json", "w") as f:
    json.dump(data["entities"], f, indent=2)
    


In [34]:
import csv

with open("../data/company-data/crunchbase_fetching_raw.json") as f:
    data = json.load(f)

with open("../data/company-data/crunchbase_fetch_linking.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["identifier.value", "identifier.permalink", "description"])
    for entity in data:
        props = entity["properties"]
        identifier = props.get("identifier", {})
        writer.writerow([
            identifier.get("value", ""),
            identifier.get("permalink", ""),
            props.get("description", "")
        ])
        
        
# Count rows in the CSV
with open("../data/company-data/crunchbase_fetch_linking.csv") as f:
    reader = csv.reader(f)
    next(reader) # Skip header
    row_count = sum(1 for row in reader)
print(f"CSV contains {row_count} rows")


CSV contains 200 rows
