In [None]:
#IMPORT PACAKGES
import pandas as pd
import random
import time
import numpy as np
import httplib2
import yaml
import json
import sys
import subprocess
from googleapiclient import discovery
from oauth2client import client

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select, WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException, ElementNotInteractableException, TimeoutException
from selenium.webdriver.chrome.options import Options

In [None]:
#DEFINE FUNCTIONS
print('Define functions...')

def get_config(config_set):
    cred_path = 'id_gsheet.yml'
    config = yaml.safe_load(open(cred_path))
    config_set = config['id'][config_set]
    return config_set



def get_credentials(cred_set, json_configs=''):
    credentials = None
    if json_configs != '':
        credentials = get_credentials_from_json(json_configs)
    else:
        client_secret = cred_set['client_secret']
        grant_type = cred_set['grant_type']
        refresh_token = cred_set['refresh_token']
        client_id = cred_set['client_id']
        url = cred_set['url']
        data = client_secret + '&' + grant_type + '&' + refresh_token + '&' + client_id
        response = json.loads(subprocess.check_output(['curl', '--data', data, url]))
        access_token = response["access_token"]
        credentials = client.AccessTokenCredentials(access_token, 'my-user-agent/1.0')
        http = httplib2.Http()
        http = credentials.authorize(http)
    return credentials

        

def wait_for_visibility(tag_name, elem_name, try_count):
        switcher = {
            "ID": By.ID,
            "NAME": By.NAME,
            "CLASS_NAME": By.CLASS_NAME,
            "XPATH": By.XPATH,
            "CSS_SELECTOR": By.CSS_SELECTOR,
            "TAG_NAME": By.TAG_NAME
        }
        try:
            WebDriverWait(driver, 10).until(EC.visibility_of_element_located((switcher.get(tag_name), elem_name)))
        except TimeoutException:
            if try_count > 9:
                print("\nPage reloaded more than 10 times!")
                print("\nProgram terminated!")
                sys.exit()
            print("Error: Element '" + elem_name + "' not found!")
            return "Timeout"



def wait_for_invisibility(tag_name, elem_name, try_count):
    switcher = {
        "ID": By.ID,
        "NAME": By.NAME,
        "CLASS_NAME": By.CLASS_NAME,
        "XPATH": By.XPATH,
        "CSS_SELECTOR": By.CSS_SELECTOR,
        "TAG_NAME": By.TAG_NAME
    }
    try:
        WebDriverWait(driver, 10).until(EC.invisibility_of_element_located((switcher.get(tag_name), elem_name)))
    except TimeoutException:
        if try_count > 9:
            print("\nPage reloaded more than 10 times!")
            print("\nProgram terminated!")
            sys.exit()
        print("Error: Element '" + elem_name + "' still present after 10 seconds!")
        return "Timeout"



def click_elem(tag_name, elem_name, try_count):
    switcher = {
        "ID": By.ID,
        "NAME": By.NAME,
        "CLASS_NAME": By.CLASS_NAME,
        "XPATH": By.XPATH,
        "CSS_SELECTOR": By.CSS_SELECTOR,
        "TAG_NAME": By.TAG_NAME
    }
    try:
        element = WebDriverWait(driver, 10).\
            until(EC.element_to_be_clickable((switcher.get(tag_name), elem_name)))
        try:
            element.click()
        except StaleElementReferenceException:
            if try_count > 9:
                print("\nPage reloaded more than 10 times!")
                print("\nProgram terminated!")
                sys.exit()
            print("Error: Element '" + elem_name + "' not attached to the page document!")
            return "Timeout"
        except ElementNotInteractableException:
            if try_count > 9:
                print("\nPage reloaded more than 10 times!")
                print("\nProgram terminated!")
                sys.exit()
            print("Error: Element '" + elem_name + "' not interactable!")
            return "Timeout"
    except TimeoutException:
        if try_count > 9:
            print("\nPage reloaded more than 10 times!")
            print("\nProgram terminated!")
            sys.exit()
        print("Error: Element '" + elem_name + "' not clickable!")
        return "Timeout"



def injection(spreadsheetId, df, rangeName):
    #generate dataframe
    df.fillna('')
    df = df.astype(str)
    df.replace(['NaN'],'',inplace= True)
    df_columns = df.columns.values.tolist()
    df_content = df.values.tolist()
    df_content.insert(0, df_columns)

    cred_set =  get_config('spreadsheet')
    credentials = get_credentials(cred_set)
    service = discovery.build('sheets', 'v4', credentials=credentials)
    list = df_content
    resource = {
      "majorDimension": "ROWS",
      "values": list
    }
    #delete all the input
    service.spreadsheets().values().clear(
        spreadsheetId=spreadsheetId,
        range=rangeName
    ).execute()
    #add all input
    service.spreadsheets().values().append(
      spreadsheetId=spreadsheetId,
      range=rangeName,
      body=resource,
      valueInputOption="USER_ENTERED"
    ).execute()



def read_gsheet(file_id_properti, rangeName):
    cred_set =  get_config('spreadsheet')
    credentials = get_credentials(cred_set)
    service = discovery.build('sheets', 'v4', credentials=credentials)
    content = service.spreadsheets().values().get(spreadsheetId=file_id_properti, range=rangeName).execute()
    #data transform from gsheet to dataframe
    headers = list(content['values'])[0]
    df = pd.DataFrame(data=np.zeros((0,len(headers))), columns=headers)
    result_df = pd.DataFrame(content['values'],columns = headers)
    df  = pd.concat([df ,result_df],axis=0)
    df = df.iloc[1:].reset_index(drop = True)
    return df



def login(try_count):
    print('Go to '+website+'...')
    driver.get(website)
    current_page_url = driver.current_url
    print('Landed on '+current_page_url+'!')
    time.sleep(random.choice(random_sleep)) # RANDOMLY DELAYING ACTIVITY TO MAKE IT LESS ROBOTIC 

    if 'public/login' in current_page_url:
        while True:
            print('Initiating logging in to the website...')
            if wait_for_visibility('XPATH', "//input[@placeholder='Email address']", try_count) != 'Timeout':

                driver.find_element(By.XPATH, "//input[@placeholder='Email address']").send_keys(id_innoscripta)
                print(driver.find_element(By.XPATH, "//input[@placeholder='Email address']").get_attribute('value'))
                time.sleep(random.choice(random_sleep)) # RANDOMLY DELAYING ACTIVITY TO MAKE IT LESS ROBOTIC

                driver.find_element(By.XPATH, "//input[@placeholder='Password']").send_keys(pass_innoscripta)
                print(driver.find_element(By.XPATH, "//input[@placeholder='Password']").get_attribute('value'))
                time.sleep(random.choice(random_sleep)) # RANDOMLY DELAYING ACTIVITY TO MAKE IT LESS ROBOTIC

                print(driver.find_element(By.XPATH, "//button[@type='submit']").get_attribute('type'))
                click_elem('XPATH', '//button[@type="submit"]', try_count)
                try_count = 0
                time.sleep(random.choice(random_sleep)) # RANDOMLY DELAYING ACTIVITY TO MAKE IT LESS ROBOTIC

                print('Login successful!')
                current_page_url = driver.current_url
                print('Landed on '+current_page_url+'!')
                break

            else:
                print('Element not found!')
                print('Refreshing page...')
                try_count += 1
                driver.get(current_page_url)
                time.sleep(random.choice(random_sleep)) # RANDOMLY DELAYING ACTIVITY TO MAKE IT LESS ROBOTIC
    else:
        print('Logged in status detected, go to https://platform.globaldatabase.com/app-aggregator/prospect/companies')
        driver.get('https://platform.globaldatabase.com/app-aggregator/prospect/companies')
        current_page_url = driver.current_url
        time.sleep(random.choice(random_sleep)) # RANDOMLY DELAYING ACTIVITY TO MAKE IT LESS ROBOTIC

    return try_count

In [None]:
#SETUP SELENIUM PYTHON
print('Setting up Selenium...')
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
# chrome_options.add_experimental_option( "prefs",{'profile.managed_default_content_settings.javascript': 1})
driver = webdriver.Chrome(options=chrome_options)
print('Selenium is ready to be used!')

In [None]:
#LOGIN TO GLOBAL DATABASE AND SET FILTER CANADA
id_innoscripta = {put your user id here}
pass_innoscripta = {put your password here}
website = 'https://platform.globaldatabase.com/public/login'
current_page_url = website
random_sleep = [2,2.5,3,3.5,4,4.5,5]
try_count = 0

try_count = login(try_count)

In [None]:
#ONLY RUN IF NEW GSHEET IS CREATED TO STORE THE DATA
# overview_data = pd.DataFrame([], columns=['company_name', 'employees', 'age', 'country', 'address', 'phones', 'faxes', 'emails', 'website', 'incorporated',
#                                         'sic_classification', 'company_type', 'industries', 'shareholders', 'accounts', 'banking_with', 'registration_number', 
#                                         'vat_number', 'description', 'monthly_web_traffic', 'number_of_tech_used', 'domain_registered'])
# employees_data = pd.DataFrame([], columns=['cxo', 'director', 'manager', 'non-manager', 'partner', 'senior', 'vp'])
# digital_insight_data = pd.DataFrame([], columns=['monthly_web_traffic', 'website', 'last_6_month_visitor', 'organic_search', 'direct_traffic', 'referral_traffic', 'page_views', 'average_session', 'bounce_rate', 'top_traffic_by_country'])

In [None]:
#GET RECORDS FROM GSHEET
print('Get existing records from gsheet...')
overview_gsheet = read_gsheet(file_id, 'Company Overview!A1:AZ')
employees_gsheet = read_gsheet(file_id, 'Company Employees!A1:AZ')
digital_insight_gsheet = read_gsheet(file_id, 'Company Digital Insights!A1:AZ')

#SCRAPE THE DETAILS OF EACH COMPANIES
file_id = '1cgSJetgMDgn0nmo0cf2bgfn84KfS4rU6udMlLheEb-4'

try:
    companies_gsheet
except:
    companies_gsheet = read_gsheet(file_id, 'Company List!A1:AZ').values.tolist()

try:
    overview_data,
    employees_data,
    digital_insight_data
except:
    overview_data = overview_gsheet.copy()
    employees_data = employees_gsheet.copy()
    digital_insight_data = digital_insight_gsheet.copy()

print('Get Companies details...')
itter = 0
try:
    for company in companies_gsheet:
        try:
            if overview_data.company_name[itter] != '' and employees_data.cxo[itter] != '' and digital_insight_data.monthly_web_traffic[itter] != '':
                itter += 1
                continue
        except:
            'continue'
                                            
        print('Itteration: '+str(itter)+'...')
        print('Get details of: '+company[0]+'...')
        overview_data.loc[overview_data.shape[0]] = ['','','','','','','','','','','','','','','','','','','','','','']
        employees_data.loc[employees_data.shape[0]] = ['','','','','','','']
        digital_insight_data.loc[digital_insight_data.shape[0]] = ['','','','','','','','','','']

        # sleep_random = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
        driver.get(company[1])
        # time.sleep(random.choice(random_sleep)) # RANDOMLY DELAYING ACTIVITY TO MAKE IT LESS ROBOTIC
        current_page_url = driver.current_url
        print('Landed on '+current_page_url+'!')
        try_count = 0

        if driver.current_url == 'https://platform.globaldatabase.com/app-aggregator/account/products/metrics?error_guard=limits.daily&detail_error=aggregator':
            #WHEN DAILY LIMIT REACHED, SAVE THE RESULT TO GHSEET
            injection(file_id, overview_data, 'Company Overview!A1:AZ')
            injection(file_id, employees_data, 'Company Employees!A1:AZ')
            injection(file_id, digital_insight_data, 'Company Digital Insights!A1:AZ')
            print('Daily limit has been reached!')
            break

        #################################################   OVERVIEW   #################################################
        while True:
            if wait_for_visibility('XPATH', "//div[@class='company-details-block']", try_count) != 'Timeout':
                detail_block = []
                for elem in driver.find_elements(By.XPATH, "//div[@class='company-details-block']"):
                    detail_block.append(elem.text.split('\n'))

                try: overview_data.loc[overview_data.index[itter], 'company_name'] = driver.find_element(By.XPATH, "//span[@class='title-company-name']").text
                except: overview_data.loc[overview_data.index[itter], 'company_name'] = np.nan
                # time.sleep(random.choice(sleep_random))
                try: overview_data.loc[overview_data.index[itter], 'employees'] = detail_block[0][1]
                except: overview_data.loc[overview_data.index[itter], 'employees'] = np.nan
                # time.sleep(random.choice(sleep_random))
                try: overview_data.loc[overview_data.index[itter], 'age'] = detail_block[0][3]
                except: overview_data.loc[overview_data.index[itter], 'age'] = np.nan
                # time.sleep(random.choice(sleep_random))
                try: overview_data.loc[overview_data.index[itter], 'monthly_web_traffic'] = detail_block[1][1]
                except: overview_data.loc[overview_data.index[itter], 'monthly_web_traffic'] = np.nan
                # time.sleep(random.choice(sleep_random))
                try: overview_data.loc[overview_data.index[itter], 'number_of_tech_used'] = detail_block[1][3]
                except: overview_data.loc[overview_data.index[itter], 'number_of_tech_used'] = np.nan
                # time.sleep(random.choice(sleep_random))
                try: overview_data.loc[overview_data.index[itter], 'domain_registered'] = detail_block[1][5]
                except: overview_data.loc[overview_data.index[itter], 'domain_registered'] = np.nan
                # time.sleep(random.choice(sleep_random))

                profile = []
                contact = []
                for i in np.arange(0, len(driver.find_elements(By.XPATH, "//div[@class='contacts-list-wrapper']"))):
                    profile = driver.find_elements(By.XPATH, "//div[@class='contacts-list-wrapper']")[0].text.split('\n')
                    # time.sleep(random.choice(sleep_random))
                    contact = driver.find_elements(By.XPATH, "//div[@class='contacts-list-wrapper']")[1].text.split('\n')
                    # time.sleep(random.choice(sleep_random))

                #CONTACT
                text = ''
                for i in np.arange(contact.index('Country of origin:')+1, contact.index('Faxes:')):
                    text += contact[i]+'|'
                try: overview_data.loc[overview_data.index[itter], 'country'] = text[:-1]
                except: overview_data.loc[overview_data.index[itter], 'country'] = np.nan
                # time.sleep(random.choice(sleep_random))
                text = ''
                for i in np.arange(contact.index('Registered address:')+1, contact.index('Emails:')):
                    text += contact[i]+'|'
                try: overview_data.loc[overview_data.index[itter], 'address'] = text[:-1]
                except: overview_data.loc[overview_data.index[itter], 'address'] = np.nan
                # time.sleep(random.choice(sleep_random))
                phones_str = ''
                for i in np.arange(contact.index('Phones:')+1, contact.index('Country of origin:')):
                    phones_str += contact[i]+'|'
                try: overview_data.loc[overview_data.index[itter], 'phones'] = phones_str[:-1]
                except: overview_data.loc[overview_data.index[itter], 'phones'] = np.nan
                # time.sleep(random.choice(sleep_random))
                faxes_str = ''
                for i in np.arange(contact.index('Faxes:')+1, contact.index('Registered address:')):
                    faxes_str += contact[i]+'|'
                try: overview_data.loc[overview_data.index[itter], 'faxes'] = faxes_str[:-1]
                except: overview_data.loc[overview_data.index[itter], 'faxes'] = np.nan
                # time.sleep(random.choice(sleep_random))
                text = ''
                for i in np.arange(contact.index('Emails:')+1, contact.index('Websites:')):
                    text += contact[i]+'|'
                try: overview_data.loc[overview_data.index[itter], 'emails'] = text[:-1]
                except: overview_data.loc[overview_data.index[itter], 'emails'] = np.nan
                # time.sleep(random.choice(sleep_random))
                text = ''
                for i in np.arange(contact.index('Websites:')+1, contact.index('Social:')):
                    text += contact[i]+'|'
                try: overview_data.loc[overview_data.index[itter], 'website'] = text[:-1]
                except: overview_data.loc[overview_data.index[itter], 'website'] = np.nan
                # time.sleep(random.choice(sleep_random))

                #PROFILE
                text = ''
                for i in np.arange(profile.index('Incorporated:')+1, profile.index('SIC classification:')):
                    text += profile[i]+'|'
                try: overview_data.loc[overview_data.index[itter], 'incorporated'] = text[:-1]
                except: overview_data.loc[overview_data.index[itter], 'incorporated'] = np.nan
                # time.sleep(random.choice(sleep_random))
                text = ''
                for i in np.arange(profile.index('SIC classification:')+1, profile.index('Company type:')):
                    text += profile[i]+'|'
                try: overview_data.loc[overview_data.index[itter], 'sic_classification'] = text[:-1]
                except: overview_data.loc[overview_data.index[itter], 'sic_classification'] = np.nan
                # time.sleep(random.choice(sleep_random))
                text = ''
                for i in np.arange(profile.index('Company type:')+1, profile.index('Industries:')):
                    text += profile[i]+'|'
                try: overview_data.loc[overview_data.index[itter], 'company_type'] = text[:-1]
                except: overview_data.loc[overview_data.index[itter], 'company_type'] = np.nan
                # time.sleep(random.choice(sleep_random))
                industries_str = ''
                for i in np.arange(profile.index('Industries:')+1, profile.index('Shareholder:')):
                    industries_str += profile[i]+'|'
                try: overview_data.loc[overview_data.index[itter], 'industries'] = industries_str[:-1]
                except: overview_data.loc[overview_data.index[itter], 'industries'] = np.nan
                # time.sleep(random.choice(sleep_random))
                shareholders_str = ''
                for i in np.arange(profile.index('Shareholder:')+1, profile.index('Accounts:')):
                    shareholders_str += profile[i]+'|'
                try: overview_data.loc[overview_data.index[itter], 'shareholders'] = shareholders_str[:-1]
                except: overview_data.loc[overview_data.index[itter], 'shareholders'] = np.nan
                # time.sleep(random.choice(sleep_random))
                banking_with_str = ''
                for i in np.arange(profile.index('Banking with:')+1, profile.index('Registration Number:')):
                    banking_with_str += profile[i]+'|'
                try: overview_data.loc[overview_data.index[itter], 'banking_with'] = banking_with_str[:-1]
                except: overview_data.loc[overview_data.index[itter], 'banking_with'] = np.nan
                # time.sleep(random.choice(sleep_random))
                text = ''
                for i in np.arange(profile.index('Registration Number:')+1, profile.index('VAT Number:')):
                    text += profile[i]+'|'
                try: overview_data.loc[overview_data.index[itter], 'registration_number'] = text[:-1]
                except: overview_data.loc[overview_data.index[itter], 'registration_number'] = np.nan
                # time.sleep(random.choice(sleep_random))
                text = ''
                for i in np.arange(profile.index('VAT Number:')+1, profile.index('Description:')):
                    text += profile[i]+'|'
                try: overview_data.loc[overview_data.index[itter], 'vat_number'] = text[:-1]
                except: overview_data.loc[overview_data.index[itter], 'vat_number'] = np.nan
                # time.sleep(random.choice(sleep_random))
                text = ''
                for i in np.arange(profile.index('Description:')+1, len(profile)-1):
                    if profile[i] != '':
                        text += profile[i]+' '
                try: overview_data.loc[overview_data.index[itter], 'description'] = text[:-1]
                except: overview_data.loc[overview_data.index[itter], 'description'] = np.nan
                # time.sleep(random.choice(sleep_random))

                break
            else:
                if driver.current_url == 'https://platform.globaldatabase.com/public/login':
                    print('HERE!')
                    login(try_count)
                    driver.get(company[1])
                else:
                    driver.get('https://platform.globaldatabase.com/app-aggregator/prospect/companies')
                    time.sleep(random.choice(random_sleep)) # RANDOMLY DELAYING ACTIVITY TO MAKE IT LESS ROBOTIC
                    driver.get(company[1])
                    try_count += 1
                    if try_count > 2:
                        login(try_count)
                        try_count = 0
                        # sys.exit('Error: try_count more than 2! ')
                    



        #################################################   EMPLOYEES   #################################################
        tab_employees = driver.find_element(By.XPATH, "//a[contains(text(), 'Employees')]").get_attribute('href')
        driver.get(tab_employees)
        # time.sleep(random.choice(random_sleep)) # RANDOMLY DELAYING ACTIVITY TO MAKE IT LESS ROBOTIC
        current_page_url = driver.current_url
        print('Landed on '+current_page_url+'!')
        try_count = 0

        if driver.current_url == 'https://platform.globaldatabase.com/app-aggregator/account/products/metrics?error_guard=limits.daily&detail_error=aggregator':
            #WHEN DAILY LIMIT REACHED, SAVE THE RESULT TO GHSEET
            injection(file_id, overview_data, 'Company Overview!A1:AZ')
            injection(file_id, employees_data, 'Company Employees!A1:AZ')
            injection(file_id, digital_insight_data, 'Company Digital Insights!A1:AZ')
            print('Daily limit has been reached!')
            break

        while True:
            if wait_for_visibility('CLASS_NAME', "employees-counter-wrapper", try_count) != 'Timeout':
                    break
            else:
                    if driver.current_url == 'https://platform.globaldatabase.com/public/login':
                        login(try_count)
                        driver.get(tab_employees)
                    else:
                        driver.get('https://platform.globaldatabase.com/app-aggregator/prospect/companies')
                        time.sleep(random.choice(random_sleep)) # RANDOMLY DELAYING ACTIVITY TO MAKE IT LESS ROBOTIC
                        driver.get(tab_employees)
                        try_count += 1
                        if try_count > 2:
                            login(try_count)
                            try_count = 0
                            # sys.exit('Error: try_count more than 2! ')

        wait_for_visibility('CLASS_NAME', 'employees-counter-wrapper', try_count)
        employees_numbers = driver.find_element(By.CLASS_NAME, 'employees-counter-wrapper').text.split('\n')
        # time.sleep(random.choice(sleep_random))
        try: employees_data.loc[employees_data.index[itter], 'cxo'] = employees_numbers[0]
        except: employees_data.loc[employees_data.index[itter], 'cxo'] = np.nan
        try: employees_data.loc[employees_data.index[itter], 'director'] = employees_numbers[2]
        except: employees_data.loc[employees_data.index[itter], 'director'] = np.nan
        try: employees_data.loc[employees_data.index[itter], 'manager'] = employees_numbers[4]
        except: employees_data.loc[employees_data.index[itter], 'manager'] = np.nan
        try: employees_data.loc[employees_data.index[itter], 'non-manager'] = employees_numbers[6]
        except: employees_data.loc[employees_data.index[itter], 'non-manager'] = np.nan
        try: employees_data.loc[employees_data.index[itter], 'partner'] = employees_numbers[8]
        except: employees_data.loc[employees_data.index[itter], 'partner'] = np.nan
        try: employees_data.loc[employees_data.index[itter], 'senior'] = employees_numbers[10]
        except: employees_data.loc[employees_data.index[itter], 'senior'] = np.nan
        try: employees_data.loc[employees_data.index[itter], 'vp'] = employees_numbers[12]
        except: employees_data.loc[employees_data.index[itter], 'vp'] = np.nan



        #################################################   DIGITAL INSIGHTS   #################################################
        wait_for_visibility('XPATH', "//a[contains(text(), 'Digital Insights')]", try_count)
        # time.sleep(random.choice(random_sleep)) # RANDOMLY DELAYING ACTIVITY TO MAKE IT LESS ROBOTIC

        tab_digital_insight = driver.find_element(By.XPATH, "//a[contains(text(), 'Digital Insights')]").get_attribute('href')
        driver.get(tab_digital_insight)
        # time.sleep(random.choice(random_sleep)) # RANDOMLY DELAYING ACTIVITY TO MAKE IT LESS ROBOTIC
        current_page_url = driver.current_url
        print('Landed on '+current_page_url+'!')
        try_count = 0

        if driver.current_url == 'https://platform.globaldatabase.com/app-aggregator/account/products/metrics?error_guard=limits.daily&detail_error=aggregator':
            #WHEN DAILY LIMIT REACHED, SAVE THE RESULT TO GHSEET
            injection(file_id, overview_data, 'Company Overview!A1:AZ')
            injection(file_id, employees_data, 'Company Employees!A1:AZ')
            injection(file_id, digital_insight_data, 'Company Digital Insights!A1:AZ')
            print('Daily limit has been reached!')
            break

        try: 
            while True:
                if wait_for_visibility('XPATH', "//div[@class='company-details-block']", try_count) != 'Timeout':
                    break
                else:
                    if driver.current_url == 'https://platform.globaldatabase.com/public/login':
                        login(try_count)
                        driver.get(tab_digital_insight)
                    else:
                        driver.get('https://platform.globaldatabase.com/app-aggregator/prospect/companies')
                        time.sleep(random.choice(random_sleep)) # RANDOMLY DELAYING ACTIVITY TO MAKE IT LESS ROBOTIC
                        driver.get(tab_digital_insight)
                        try_count += 1
                        if try_count > 1:
                            try_count = 0
                            sys.exit('Error: try_count more than 2! ')

            detail_block = []
            for elem in driver.find_elements(By.XPATH, "//div[@class='company-details-block']"):
                detail_block.append(elem.text.split('\n'))
            detail_block = list(np.concatenate(detail_block).flat)
            # time.sleep(random.choice(sleep_random))

            try: digital_insight_data.loc[digital_insight_data.index[itter], 'monthly_web_traffic'] = detail_block[1]
            except: digital_insight_data.loc[digital_insight_data.index[itter], 'monthly_web_traffic'] = np.nan
            try: digital_insight_data.loc[digital_insight_data.index[itter], 'website'] = detail_block[3]
            except: digital_insight_data.loc[digital_insight_data.index[itter], 'website'] = np.nan
            try: digital_insight_data.loc[digital_insight_data.index[itter], 'page_views'] = detail_block[5]
            except: digital_insight_data.loc[digital_insight_data.index[itter], 'page_views'] = np.nan
            try: digital_insight_data.loc[digital_insight_data.index[itter], 'average_session'] = detail_block[7]
            except: digital_insight_data.loc[digital_insight_data.index[itter], 'average_session'] = np.nan
            try: digital_insight_data.loc[digital_insight_data.index[itter], 'bounce_rate'] = detail_block[9]
            except: digital_insight_data.loc[digital_insight_data.index[itter], 'bounce_rate'] = np.nan

            detail_block = []
            for elem in driver.find_elements(By.XPATH, "//div[@class='company-advises-block digital-insights-traffic']"):
                detail_block.append(elem.text.split('\n'))
            detail_block = list(np.concatenate(detail_block).flat)
            # time.sleep(random.choice(sleep_random))

            try: digital_insight_data.loc[digital_insight_data.index[itter], 'last_6_month_visitor'] = detail_block[2]
            except: digital_insight_data.loc[digital_insight_data.index[itter], 'last_6_month_visitor'] = np.nan
            
            detail_block = []
            for elem in driver.find_elements(By.XPATH, "//div[@class='company-advises-block']"):
                detail_block.append(elem.text.split('\n'))
            detail_block = list(np.concatenate(detail_block).flat)
            # time.sleep(random.choice(sleep_random))

            try: digital_insight_data.loc[digital_insight_data.index[itter], 'organic_search'] = detail_block[1]
            except: digital_insight_data.loc[digital_insight_data.index[itter], 'organic_search'] = np.nan
            try: digital_insight_data.loc[digital_insight_data.index[itter], 'direct_traffic'] = detail_block[4]
            except: digital_insight_data.loc[digital_insight_data.index[itter], 'direct_traffic'] = np.nan
            try: digital_insight_data.loc[digital_insight_data.index[itter], 'referral_traffic'] = detail_block[7]
            except: digital_insight_data.loc[digital_insight_data.index[itter], 'referral_traffic'] = np.nan

            detail_block = []
            for elem in driver.find_elements(By.XPATH, "//div[@class='ant-spin-container']"):
                detail_block.append(elem.text.split('\n'))
            detail_block = list(np.concatenate(detail_block).flat)
            # time.sleep(random.choice(sleep_random))

            text = ''
            for i in np.arange(detail_block.index('Top Traffic by Country')+3, len(detail_block)-1):
                text += detail_block[i]+'|'

            try: digital_insight_data.loc[digital_insight_data.index[itter], 'top_traffic_by_country'] = text[:-1]
            except: digital_insight_data.loc[digital_insight_data.index[itter], 'top_traffic_by_country'] = np.nan
        
        except:
            if wait_for_visibility('XPATH', '//span[contains(text(), "Unfortunately, no data was provided")]', try_count) != 'Timeout':
                print('Error: '+driver.find_element(By.XPATH, '//span[contains(text(), "Unfortunately, no data was provided")]').text)
                digital_insight_data.loc[digital_insight_data.index[itter], 'monthly_web_traffic'] = np.nan
                digital_insight_data.loc[digital_insight_data.index[itter], 'website'] = np.nan
                digital_insight_data.loc[digital_insight_data.index[itter], 'page_views'] = np.nan
                digital_insight_data.loc[digital_insight_data.index[itter], 'average_session'] = np.nan
                digital_insight_data.loc[digital_insight_data.index[itter], 'bounce_rate'] = np.nan
                digital_insight_data.loc[digital_insight_data.index[itter], 'organic_search'] = np.nan
                digital_insight_data.loc[digital_insight_data.index[itter], 'direct_traffic'] = np.nan
                digital_insight_data.loc[digital_insight_data.index[itter], 'referral_traffic'] = np.nan
                digital_insight_data.loc[digital_insight_data.index[itter], 'top_traffic_by_country'] = np.nan

        itter += 1`
except:
    #IN CASE THE SCRAPER GET AN ERROR IN DURING EXECUTION, SAVE THE RESULT TO GSHEET
    print('Error found during program execution, saving scrapped data to gsheet...')
    injection(file_id, overview_data, 'Company Overview!A1:AZ')
    injection(file_id, employees_data, 'Company Employees!A1:AZ')
    injection(file_id, digital_insight_data, 'Company Digital Insights!A1:AZ')

print('Finished!')