# Setup

In [1]:
import pandas as pd
import numpy as numpy
import os
import json
from pathlib import Path
from tqdm.notebook import tqdm

# Own functions
from utils import query, ETL
from utils.helpers import *

## Load api key

In [2]:
keys_path = "keys/api_key.json"
# Check if existing keys
if not os.path.exists(keys_path):
    keys = {}
    while True:
        # In case it does not exist, the following are requested 
        name_key = str(input("Enter name of your API key: "))
        api_key = str(input("Enter your API key token: "))

        keys[name_key] = api_key

        answer = str(input("Other API key? y/n")).lower()

        if answer in ["y", "yes", "n", "no"]:
            Path('keys').mkdir(parents=True, exist_ok=True)
            if answer in ["n", "no"]:
                # Finally save keys
                with open(keys_path, 'w') as file:
                    json.dump(keys, file)
                break
        else:
            while answer not in ["y", "yes", "n", "no"]:
                answer = str(input("Invalid answer\n. Other API key? y/n")).lower()


    
with open(keys_path, 'r') as file:
    keys_data = json.load(file) 

printg("API keys loaded successfully")             

[92mAPI keys loaded successfully[0m


# Load data

In [3]:

data_path = "data/data.csv"
# Check if existing data
if not os.path.exists(data_path):
    Path('data').mkdir(parents=True, exist_ok=True)
    printy("Start search")
    # Extract first page
    dict_data_result = query.searchAPI(keys_data["hubspot"])
    # Convert in DataFrame
    data = [ETL.dict2dataframe(dict_data_result)]
    
    data_size = dict_data_result["total"]
    iterations = data_size // 100 if data_size % 100 == 0 else (data_size // 100) + 1
    limit = 100
    for i in tqdm(range(1,iterations), desc="Extracting"):
        if data_size - (i*100) <= 100:
            limit = data_size - (i*100)
            print(limit)
        dict_data_result = query.searchAPI(keys_data["hubspot"], limit = limit, page = i)
        # Convert in DataFrame
        data.extend([ETL.dict2dataframe(dict_data_result)])
    printg("Finish search")
    # Save
    data = pd.concat(data)
    data.to_csv(data_path, sep = "|", index = False)

else:
    print("Exist data.csv")
    printy("Start load")
    data = pd.read_csv(data_path, sep = "|")
    printg("Finish load")


print("\t Total elements", len(data))

[93mStart search[0m


Extracting:   0%|          | 0/69 [00:00<?, ?it/s]

36
[92mFinish search[0m
	 Total elements 6936


In [4]:
data.tail()

Unnamed: 0,address,country,createdate,hs_object_id,industry,lastmodifieddate,phone,raw_email,technical_test___create_date
31,"Spruce Grove, 6997",Milton Keynes,2023-05-15T02:39:02.017Z,413209,Fruit and vegetables,2023-09-16T10:58:09.029Z,0-804-316-088,Violet <violet_moran1613873168@gembat.biz> Con...,2021-02-11
32,"King William Rue, 748",Cork,2023-05-15T02:39:02.017Z,413901,Dairy products,2023-09-16T10:55:10.641Z,3-668-267-421,Vera <vera_payne1587262285@gmail.com> Contact ...,2021-01-10
33,"Udall Street, 7289",London,2023-05-15T02:39:02.017Z,421501,Milling,2023-09-16T10:53:10.027Z,4-735-376-611,Vera <vera_payne1227725435@irrepsy.com> Contac...,2022-01-01
34,"Belmont Park Walk, 5338",Dublin,2023-05-15T02:39:02.017Z,421753,Poultry and fish,2023-09-16T10:55:40.011Z,5-655-825-605,Vera <vera_janes1928590357@nanoff.biz> Contact...,2021-11-18
35,"Woodland Hill, 8430",Milton Keynes,2023-05-15T02:39:02.017Z,422853,Dairy products,2023-09-16T10:53:28.891Z,7-777-162-004,Vicky <vicky_morrison114799425@atink.com> Cont...,2021-07-16
