In [1]:
import csv
import json
from urllib.request import urlopen
from urllib.parse import urlencode

In [2]:
google_api_key = open('.google_api_key').read()
query_url = 'https://kgsearch.googleapis.com/v1/entities:search'

In [3]:
def get_company_json(query):
  params = {
    'query': query,
    'limit': 1,
    'indent': True,
    'key': google_api_key,
    'types': 'Organization'
  }
  req_url = query_url + '?' + urlencode(params)
  resp = json.loads(urlopen(req_url).read())
  if not resp['itemListElement']:
    print('Empty query result:', query)
    return None
  return resp['itemListElement'][0]['result']

In [4]:
def parse_company_info(jso):
  if not jso:
    return '', '', '', ''
  name = jso.get('name', '')
  ind = jso.get('description', '')
  url = jso.get('url', '')
  desc = ''
  details = jso.get('detailedDescription', None)
  if details:
    desc = details.get('articleBody', '')
    if not url:
      url = jso.get('url', '')
  return name, ind, desc, url

In [5]:
def make_company_dict(booth_dict, name):
  jso = get_company_json(name)
  name2, ind, desc, url = parse_company_info(jso)
  if name != name2:
    print('Possible name mismatch', name, name2)
  return {'booth': booth_dict[name], 
          'name': name,
          'industry': ind,
          'description': desc, 
          'url': url}

In [6]:
booths = {}
with open('ExhibitorList.csv') as f:
  reader = csv.DictReader(f)
  for row in reader:
    booths[row['Company']] = row['Booth']

In [7]:
data = {'companies': []}
for idx, name in enumerate(booths):
  print('Processing {} out of 466: {}'.format(idx, name))
  d = make_company_dict(booths, name)
  if d:
    data['companies'].append(d)

Processing 0 out of 466: 10x Genomics
Processing 1 out of 466: 3M
Processing 2 out of 466: Accenture
Processing 3 out of 466: ACM / ACM-W
Possible name mismatch ACM / ACM-W Association for Computing Machinery
Processing 4 out of 466: Activision Blizzard
Processing 5 out of 466: Adobe
Processing 6 out of 466: ADP
Possible name mismatch ADP ADP, LLC
Processing 7 out of 466: Aetna/CVS Health
Possible name mismatch Aetna/CVS Health CVS Pharmacy
Processing 8 out of 466: Affirm
Processing 9 out of 466: Airbnb, Inc.
Possible name mismatch Airbnb, Inc. Airbnb
Processing 10 out of 466: AKAMAI
Possible name mismatch AKAMAI Akamai Technologies
Processing 11 out of 466: Alegeus
Empty query result: Alegeus
Possible name mismatch Alegeus 
Processing 12 out of 466: AlixPartners
Processing 13 out of 466: Allstate Insurance Company
Possible name mismatch Allstate Insurance Company Allstate
Processing 14 out of 466: Amazon
Possible name mismatch Amazon Amazon.com
Processing 15 out of 466: American Expre

Processing 103 out of 466: CrowdStrike
Processing 104 out of 466: Cru
Processing 105 out of 466: Cummins Inc.
Possible name mismatch Cummins Inc. Cummins
Processing 106 out of 466: D. E. Shaw Research
Possible name mismatch D. E. Shaw Research D. E. Shaw &amp; Co.
Processing 107 out of 466: Datadog
Processing 108 out of 466: Dataminr
Empty query result: Dataminr
Possible name mismatch Dataminr 
Processing 109 out of 466: Datto
Processing 110 out of 466: DeepMind
Possible name mismatch DeepMind DeepMind Technologies
Processing 111 out of 466: Deloitte
Processing 112 out of 466: Denso
Processing 113 out of 466: Deutsche Bank
Processing 114 out of 466: DigitalBCG
Empty query result: DigitalBCG
Possible name mismatch DigitalBCG 
Processing 115 out of 466: Discover
Possible name mismatch Discover Discover Financial Services
Processing 116 out of 466: Docker, Inc.
Processing 117 out of 466: DocuSign
Processing 118 out of 466: DoorDash
Processing 119 out of 466: DraftKings
Processing 120 out 

Possible name mismatch LinkedIn Microsoft Corporation
Processing 215 out of 466: LiveRamp
Processing 216 out of 466: Looker Data Sciences
Possible name mismatch Looker Data Sciences Looker
Processing 217 out of 466: Los Alamos National Laboratory
Processing 218 out of 466: Los Angeles Dept of Water and Power
Possible name mismatch Los Angeles Dept of Water and Power Los Angeles Department of Water and Power
Processing 219 out of 466: Lowe's Companies, Inc.
Possible name mismatch Lowe's Companies, Inc. Lowe's
Processing 220 out of 466: Lyft
Processing 221 out of 466: Macy's Tech
Possible name mismatch Macy's Tech Paribus
Processing 222 out of 466: Major League Baseball
Possible name mismatch Major League Baseball MLB
Processing 223 out of 466: Massachusetts Institute of Technology - Lincoln Laboratory
Possible name mismatch Massachusetts Institute of Technology - Lincoln Laboratory Massachusetts Institute of Technology
Processing 224 out of 466: Massachusetts Institute of Technology - S

Possible name mismatch Reddit, Inc. reddit Inc.
Processing 308 out of 466: Redfin
Processing 309 out of 466: RetailMeNot Inc.
Possible name mismatch RetailMeNot Inc. RetailMeNot
Processing 310 out of 466: Rice University
Processing 311 out of 466: Riot games
Possible name mismatch Riot games Riot Games
Processing 312 out of 466: Ripple
Possible name mismatch Ripple Ripple Labs
Processing 313 out of 466: Robinhood Markets Inc.
Possible name mismatch Robinhood Markets Inc. Robinhood
Processing 314 out of 466: Rochester Institute Of Technology
Possible name mismatch Rochester Institute Of Technology Rochester Institute of Technology
Processing 315 out of 466: Rocket Mortgage by Quicken Loans
Possible name mismatch Rocket Mortgage by Quicken Loans Quicken Loans
Processing 316 out of 466: Rockwell automation
Possible name mismatch Rockwell automation Rockwell Automation
Processing 317 out of 466: Rose-Hulman Institute of Technology
Processing 318 out of 466: Royal Caribbean Cruises Ltd.
Pos

Empty query result: University of Arizona MIS
Possible name mismatch University of Arizona MIS 
Processing 404 out of 466: University of California, Irvine
Possible name mismatch University of California, Irvine 
Processing 405 out of 466: University of California, Santa Cruz
Possible name mismatch University of California, Santa Cruz University of California Santa Cruz
Processing 406 out of 466: University of Chicago
Possible name mismatch University of Chicago 
Processing 407 out of 466: University of Denver
Processing 408 out of 466: University of Maryland Engineering
Possible name mismatch University of Maryland Engineering University of Maryland
Processing 409 out of 466: University of Maryland-Department of Computer Science
Empty query result: University of Maryland-Department of Computer Science
Possible name mismatch University of Maryland-Department of Computer Science 
Processing 410 out of 466: University of Massachusetts Amherst
Processing 411 out of 466: University of Mich

In [8]:
with open('data.txt', 'w') as outfile:
  json.dump(data, outfile, sort_keys=True, indent=4, separators=(',', ': '))