In [None]:
import pandas as pd
import numpy as np
import requests
import time

* define function to load a company list and filter for **founding year**, **industry** and **size**

In [None]:
def load_companies(path, filter_year=None, filter_industry=None, filter_size=None):
    df = pd.read_json(path)
    if filter_year:
        f = (str(x) for x in filter_year)
        df = df.loc[df['founded'].isin(f)]
    
    if filter_industry:
        df = df.loc[df['industry'].isin(filter_industry)]
    
    if filter_size:
        size_dict = {10:'1-10',
                     50:'11-50',
                     200:'51-200', 
                     500:'201-500', 
                     1000:'501-1000', 
                     5000:'1001-5000',
                     10000:'5001-10000', 
                     10001:'10001+'
                    }
        f = [size_dict[x] for x in filter_size]
        df = df.loc[df['size'].isin(f)]
    
    return df, np.array(df['id'])

* define function to call accountstory DM-API and create a pandas dataframe

In [None]:
def load_data(API_KEY, company_list, per_page, page):
    url = 'https://accountstory.com/api/v2/decision_makers?api_key=' + API_KEY
    headers = {'Content-Type': 'application/x-www-form-urlencoded', 'Accept': 'application/json'}
    
    df = pd.DataFrame()
    n = len(company_list)
    for i, company in enumerate(company_list):
        try:
            if i % 100 == 0:
                print(i,'/',n,'---', len(df), 'data points ---', time.strftime('%H:%M'))

            payload = 'company_name=' + company + '&per_page=' + str(per_page) + '&page=' + str(page)
            req = requests.post(url,
                                headers=headers,
                                data=payload
                              )


            records = req.json()['records']
            df_temp = pd.DataFrame(records)
            if not df.empty:
                df = df.append(df_temp, ignore_index = True)
            else:
                df = df_temp
        except: pass
    print('done')
    return pd.DataFrame(df)

* load company dataframe and company list
* needs dataset created from https://www.peopledatalabs.com/company-dataset, filtered with MySql such that **only CA companies and only tech**)
* additional filtering for **founding year**, **industry** and **size** not applied

In [None]:
company_df, company_list = load_companies('../data/company_list_CA.json', 
                            #filter_year=[2019], 
                            #filter_industry=['information technology and services'], 
                            #filter_size=[500,
                            #             1000]
                           )
company_df

* load data

In [None]:
key = # accountstory API key
df = load_data(key, company_list, 50, 0) # get 50 people for each company

Export dataframe

In [None]:
df.to_json('../data/people_list_DM.json')