# D-U-N-S Extractor

In [1]:
# pip install -U selenium

# Use this line of code to install selenium if needed

In [2]:
import selenium
import numpy as np
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd

In [3]:
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [4]:
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District Of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}
us_state = {value:key for key, value in us_state_abbrev.items()}

# Robot

In [20]:
from selenium.webdriver.chrome.options import Options
import fake_useragent
from fake_useragent import UserAgent

In [21]:
# send_email(Canme, Cstate, email)
# Cname: an array of company names
# Cstate: an array of state full name of each company
# email: the email address that you wish dnb.com to send you the DUNS number
# return: this function return the company names that do not have a DUNS number on dnb.com

def send_email(Cname, Cstate, email):
    assert len(Cname) == len(Cstate), 'Number of company names do not match with number of States'
    unfounded = []
    for i in np.arange(len(Cname)):
        ua = UserAgent()
        user_agent = ua.random
        options = webdriver.ChromeOptions()
        options.add_argument(f'user-agent={user_agent}')
        options.add_argument('--headless')
        driver = webdriver.Chrome(options = options)
        driver.get("https://www.dnb.com/duns-number/lookup.html")
        # First dropdown
        time.sleep(7)
        select1 = Select(driver.find_element_by_name('primary-reason-dropdown-select-component'))
        select1.select_by_visible_text('Other company')
        # Fill in Company Names
        name = Cname[i]
        state = Cstate[i]
        print(name)
        driver.find_element_by_class_name('search-form__input__name__input').clear()
        driver.find_element_by_class_name('search-form__input__name__input').send_keys(name)
        # Select State and Click Search
        select2 = Select(driver.find_element_by_xpath('//*[@id="content"]/div[5]/form/div/div[2]/div[4]/div[2]/select'))
        select2.select_by_visible_text(state)
        time.sleep(2)
        driver.find_element_by_xpath('//*[@id="submit-search"]').click()
        # Click Email link
        time.sleep(5)
        try:
            driver.find_element_by_xpath('//*[@id="content"]/div[5]/div/div[2]/div[2]/div[1]/div[1]/div[3]/div/div/div[1]/a').click()
        except Exception:
            unfounded.append(name)
            driver.quit()
            continue
        # Send Email
        first_name = 'haas'
        last_name = 'chou'
        driver.find_element_by_xpath('//*[@id="content"]/div[5]/div[1]/div/div/form/div/input[1]').clear()
        driver.find_element_by_xpath('//*[@id="content"]/div[5]/div[1]/div/div/form/div/input[1]').send_keys(first_name)

        driver.find_element_by_xpath('//*[@id="content"]/div[5]/div[1]/div/div/form/div/input[2]').clear()
        driver.find_element_by_xpath('//*[@id="content"]/div[5]/div[1]/div/div/form/div/input[2]').send_keys(last_name)

        driver.find_element_by_xpath('//*[@id="content"]/div[5]/div[1]/div/div/form/div/input[3]').clear()
        driver.find_element_by_xpath('//*[@id="content"]/div[5]/div[1]/div/div/form/div/input[3]').send_keys(email)
        last_click = driver.find_element_by_xpath('//*[@id="content"]/div[5]/div[1]/div/div/form/button').click()
        time.sleep(4)
        driver.quit()
    return unfounded

### Please import your dataset with the following code

In [6]:
ns = pd.read_csv('name_state.csv')

### This section might be modified based on the imported dataset. All the algorithm need is the name of each company and the Abbreviations of its coresponding state (if the dataset has state listed as full name, you won't need to run the replace_abbr)

In [7]:
inputds = ns[['company_name', 'state']] # Run this code to get a DF with only company name and state

In [8]:
inputds = inputds[(inputds['state'] != 'MP') & (inputds['state'] != 'AS')]

In [9]:
def replace_abbr(abbr):
    rt = []
    for i in abbr:
        rt.append(us_state[i])
    return rt

In [10]:
states = pd.DataFrame(us_state_abbrev.values(), columns = ['state'])

In [11]:
inputds = pd.merge(inputds, states, on='state', how='right')

In [12]:
inputds['state'] = replace_abbr(inputds['state'])

In [13]:
inputds.drop_duplicates(subset ="company_name", 
                     keep = False, inplace = True)

### Run the Robot with the following code, and you will recive a lot of emails from dnb.come

In [None]:
unfound = send_email(inputds['company_name'].to_list(), inputds['state'].to_list(), 'your email address')

# you can also adjust the size of your inputds to fit your need. The above code simpliy inputed the entire dataset in
# the send_eamil function

# Sign in to Gmail

In [66]:
email_address = 'your email address'
password = 'your email password'

In [67]:
def gmail_login():
    driver.execute_script("window.open('');")
    driver.switch_to.window(driver.window_handles[1])
    driver.get('https://accounts.google.com/signin/v2/identifier?continue=https%3A%2F%2Fmail.google.com%2Fmail%2F&service=mail&sacu=1&rip=1&flowName=GlifWebSignIn&flowEntry=ServiceLogin')
    driver.find_element_by_xpath('//*[@id="identifierId"]').send_keys(email_address)
    driver.find_element_by_xpath('//*[@id="identifierNext"]/span/span').click()
    time.sleep(2)
    driver.find_element_by_xpath('//*[@id="password"]/div[1]/div/div[1]/input').send_keys(password)
    driver.find_element_by_xpath('//*[@id="passwordNext"]/span').click() 

In [70]:
gmail_login()

# Extract Data From Email (yet to finish!)