<b><h1>Hype & Dexter Backend Test</h1></b>

<b>Sanjay Patel \
10/12/2022</b>

-----

<h2><b>Tables of Contents</b></h2>

- 1.0. Objectives
- 2.0. Creating the Program
    - 2.1. Update Company name
    - 2.2. Create Parent Companies
    - 2.3. Build Parent/Child Association

-----

<b><h2>1.0. Objectives</h2></b>

- Identify child companies - based on the internal ‘Client Parent Company ID’ or if company is not a child company
- Update the Company ‘Name’ using the supplied CSV as a reference
- Create parent company (if needed), where the  ‘Client Parent Company ID’ becomes that company’s “Client Company Location ID”.
- Associate each child company to the newly created parent company
- Document/ commit your code on your choice of repository (GitHub preferable)


-----

<b><h2>2.0. Creating the Program</h2></b>

In [1]:
# import the libraries
import numpy as np
import pandas as pd
import re
import hubspot
from typing import List
from collections import Counter
from pprint import pprint
from hubspot import HubSpot
from hubspot.crm.companies import SimplePublicObjectInput


In [535]:
# read private app token from external file for security (as opposed to hard coding it)
f = open('token.txt', 'r')
token = f.readline()

<b><h3>2.1. Update Company Name</h3></b>

In [537]:
# create client
client = hubspot.Client.create(access_token=token)

In [538]:
# get response
response = client.crm.companies.get_all(properties=["hs_object_id, name, right_company_name, client_company_location_id, client_parent_company_id"])

In [539]:
# pretty print the response to check if we've received valid data
pprint(response)

[{'archived': False,
 'archived_at': None,
 'associations': None,
 'created_at': datetime.datetime(2022, 12, 8, 20, 29, 11, 406000, tzinfo=tzutc()),
 'id': '10287658376',
 'properties': {'client_company_location_id': '5230',
                'client_parent_company_id': '4157',
                'createdate': '2022-12-08T20:29:11.406Z',
                'hs_lastmodifieddate': '2022-12-10T10:52:29.263Z',
                'hs_object_id': '10287658376',
                'name': 'Datacom Systems Ltd - 5230',
                'right_company_name': 'Datacom Systems Ltd - 5230'},
 'properties_with_history': None,
 'updated_at': datetime.datetime(2022, 12, 10, 10, 52, 29, 263000, tzinfo=tzutc())},
 {'archived': False,
 'archived_at': None,
 'associations': None,
 'created_at': datetime.datetime(2022, 12, 8, 20, 29, 11, 408000, tzinfo=tzutc()),
 'id': '10287658381',
 'properties': {'client_company_location_id': '7662',
                'client_parent_company_id': '3114',
                'createdate': '2

In [338]:
# get number of companies i.e. the length to reference later
num_of_companies = len(response)
print(num_of_companies)

361


In [339]:
# get all the company info into a single array
company_all_info = []

for companies in response:
    company_all_info.append(companies.to_dict())

In [340]:
# check type to see what data type we have
type(company_all_info[0])

dict

In [527]:
# check properties of a given company
company_all_info[0]['properties']

{'client_company_location_id': '5230',
 'client_parent_company_id': '4157',
 'createdate': '2022-12-08T20:29:11.406Z',
 'hs_lastmodifieddate': '2022-12-10T03:29:09.161Z',
 'hs_object_id': '10287658376',
 'name': 'Datacom Systems Ltd',
 'right_company_name': 'Datacom Systems Ltd - 5230'}

In [342]:
# init arrays for each of the properties
company_id_list = []
right_name_company_list = []
location_id_list = []
parent_id_list = []

In [343]:
# append each of the properties to their respective arrays
for companies in company_all_info:
    company_id_list.append(companies['properties']['hs_object_id'])

for companies in company_all_info:
    right_name_company_list.append(companies['properties']['right_company_name'])

for companies in company_all_info:
    location_id_list.append(companies['properties']['client_company_location_id'])

for companies in company_all_info:
    # check if parent company id is NULL
    if not (re.search('parent', companies['properties']['right_company_name'], re.IGNORECASE)):
        parent_id_list.append(companies['properties']['client_parent_company_id'])
    else:
        parent_id_list.append(-1)

In [344]:
# create a dictionary of company id and right company name for ease of use
company_names_dict = dict(zip(company_id_list, right_name_company_list))

In [345]:
# use regex to clean text of the company names
# e.g Datacom Systems Ltd - 5230
# to  Datacom Systems Ltd  
def clean_text(text: str) -> str:
    text = re.sub(r'-\W', ' ', text)
    text = re.sub(r'\d', ' ', text)
    re.sub(r'\s+', ' ', text)
    text = text.strip()

    return text

In [346]:
def update_company_names(companies_dict: str) -> None:
    
    for index in range(len(companies_dict)):
        # property to be updated
        properties = {
            "name": list(companies_dict.values())[index] # gets the value from the given index
        }

        # create object with the updated property
        simple_public_object_input = SimplePublicObjectInput(properties=properties)

        # send the update
        api_response = client.crm.companies.basic_api.update(company_id=list(companies_dict.keys())[index], simple_public_object_input=simple_public_object_input)

In [347]:
# update the compnay names
update_company_names(company_names_dict)

-----

<b><h3>2.2. Create Parent Companies</h3></b>

In [348]:
# list for cleaned company names
clean_company_names_list = [clean_text(name) for name in right_name_company_list]

In [350]:
# cleaned company names dictionary to reference later
clean_company_names_dict = dict(zip(company_id_list, clean_company_names_list))

# parrent id dictionary to reference later
company_parent_id_dict = dict(zip(company_id_list, parent_id_list))

In [352]:
def get_company_name(id: str) -> str:
    
    # get the correct object id to reference
    company_object_id = list(company_parent_id_dict.keys())[list(company_parent_id_dict.values()).index(id)]

    # use the object id to get the corresponding name
    name = clean_company_names_dict.get(company_object_id)

    return name

In [355]:
def create_parent_companies(parent_company_list: List[str]) -> None:

    for id in parent_company_list:
        if id not in location_id_list:
            
            # create parent company   properties      
            properties = {
                "name": get_company_name(str(id)) + " (Parent)",
                "right_company_name": get_company_name(str(id)) + " (Parent)",
                "client_company_location_id": id

            }

            # create each company with the above properties
            simple_public_object_input = SimplePublicObjectInput(properties=properties)
            api_response = client.crm.companies.basic_api.create(simple_public_object_input=simple_public_object_input)

In [356]:
# get count of number of companeis per parent company id
count_of_company_location_id = dict(Counter(parent_id_list))

In [357]:
# only add companies that have more the one entity and is not -1 i.e. an existing parent company
parents_to_create = {x: count for x, count in count_of_company_location_id.items() if count >= 2 and x != -1}

In [359]:
# create the parent companies
create_parent_companies(list(parents_to_create.keys()))

-----

<b><h3>2.3. Build Parent/Child Association</h3></b>

In [360]:
# get updated object IDs of the new parent companies 
response = client.crm.companies.get_all(properties=["hs_object_id, name, right_company_name, client_company_location_id, client_parent_company_id"])

updated_company_id = []

for companies in response:
    updated_company_id.append(companies.to_dict())

In [361]:
# ensure the correct length to confirm new parent companies
len(updated_company_id)

388

In [425]:
updated_company_id_list = []
updated_location_id_list = []
updated_parent_id_list = []

for companies in updated_company_id:
    updated_company_id_list.append(companies['properties']['hs_object_id'])

for companies in updated_company_id:
    updated_location_id_list.append(companies['properties']['client_company_location_id'])

for companies in updated_company_id:
    # check if parent company id is NULL
    if not (re.search('parent', companies['properties']['right_company_name'], re.IGNORECASE)):
        updated_parent_id_list.append(companies['properties']['client_parent_company_id'])
    else:
        updated_parent_id_list.append(-1)

In [365]:
# crete dictionaries to use later
updated_company_location_dict = dict(zip(updated_company_id_list, updated_location_id_list))
updated_company_parent_id_dict = dict(zip(updated_company_id_list, updated_parent_id_list))

In [531]:
def get_object_id(location_id: str) -> str:
    
    # use list comprehension to get the index for a given key. Then use that index to find the corresponding object id
    object_id = list(updated_company_location_dict.keys())[list(updated_company_location_dict.values()).index(location_id)]

    return id

In [500]:
def get_parent_id(object_id: str) -> str:

    # get parent id by using the object id as key and get corresponding value
    parent_id = updated_company_parent_id_dict.get(object_id)

    return parent_id



In [435]:
def create_association(parent_object_id, child_object_id):
    
    api_response = client.crm.companies.associations_api.create(company_id=int(parent_object_id), 
                                                                to_object_type="company", 
                                                                to_object_id=int(child_object_id), 
                                                                association_spec=[{"associationCategory":"HUBSPOT_DEFINED","associationTypeId":13}])


In [None]:
# run loop to create associations based on the conditions below
for id in updated_location_id_list:
    
    # get child object id based on location id
    child_object_id = get_object_id(id)    

    # get parent id for child based on object id
    child_parent_id = get_parent_id(child_object_id)

    # check if child's parent id is in location id list
    if(child_parent_id in updated_location_id_list):

        # get parent object id based on child parent id
        parent_object_id = get_object_id(child_parent_id)

        # create parent-child association
        create_association(parent_object_id, child_object_id)