In [None]:
# Importing relevant libraries
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver import ActionChains
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.chrome.options import Options
from fake_useragent import UserAgent
import pandas as pd
import yfinance as yf

In [None]:
# Reading in the base input dataset
df = pd.read_excel('D:\SDG2000-shareable-list_28.04.2022_initial_data.xlsx', sheet_name = '2. SDG2000')

In [None]:
# Adding new columns to record the scraped MSCI and Refinitive ratings and corresponding company names.
df['MSCI_Rating'] = 0
df['Refinitive_Rating'] = 0
df['MSCI_Company_name'] = ''
df['Refinitive_Company_name'] = ''

In [None]:
# Main loop which runs the scraper
for (index, Name) in enumerate(df['Name'][0:2000]): 
    
    company_name = Name # Creating a new variable for a specific company name
    options = Options() # Creating advanced options for the Chrome webdriver
    ua = UserAgent() # Creating useragents as part of the protocol to prevent scraper being detected
    userAgent = ua.random
    options.add_argument(f'user-agent={userAgent}') # Adding in the randomly generated useragent
    options.add_experimental_option('useAutomationExtension', False) # Additional tools to prevent detection
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_argument('--disable-blink-features=AutomationControlled')

    # MSCI
    driver = webdriver.Chrome('C:\chromedriver.exe',options=options) # Launching a webdriver with the options
    driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})") 
    
    driver.get('https://www.msci.com/research-and-insights/esg-ratings-corporate-search-tool') # MSCI website

    wait = WebDriverWait(driver,5) # Defining wait variable for later use
    action = ActionChains(driver) # Defining action variable for later use

    elem = driver.find_element(By.ID, "_esgratingsprofile_keywords") # Finding the search bar
    try:
        elem.clear() # Clearing any default values typed into the search bar
        elem.send_keys(company_name) # Typing in the company name in the search bar

        searchTextbox = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="ui-id-1"]/li'))) # Waiting
        # until the suggested company name shows up after typing the name
        action.move_to_element(searchTextbox).click().perform() # Clicking on the first suggested company

        wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="_esgratingsprofile_esg-ratings-profile-header"]/div/div[1]/div[2]/div')))
        # Waiting until the page loads

    # MSCI COMPANY NAME
        # Find the name of company on the page and add it to the MSCI Company name column
        company_name_msci_scraper = driver.find_element(By.XPATH, '//*[@id="_esgratingsprofile_esg-ratings-profile-container"]/div[1]/h1')
        company_name_msci = company_name_msci_scraper.text
        df.iat[index,16] = company_name_msci
        
    # MSCI COMPANY RATING
        # Waiting until the rating symbol loads and taking the rating letter
        wait.until(EC.presence_of_element_located((By.CLASS_NAME, "ratingdata-company-rating")))
        rating_icon = driver.find_element_by_class_name(name="ratingdata-company-rating")
        other_class = rating_icon.get_attribute("class")
        rating = other_class.split("esg-rating-circle-")[-1].lower()
        # Depending on what the letter rating is, converting it into the numerical equivalent and adding it
        # to the MSCI rating column
        if rating == 'ccc':
            company_rating = 7.145
        if rating == 'b':
            company_rating = 21.435
        if rating == 'bb':
            company_rating = 35.725
        if rating == 'bbb':
            company_rating = 50.015
        if rating == 'a':
            company_rating = 64.305
        if rating == 'aa':
            company_rating = 78.595
        if rating == 'aaa':
            company_rating = 92.885    
            
        df.iat[index,14] = company_rating
        
        driver.close() # Closing the driver
        
    except TimeoutException as ex: # If the company name is not found and a timeout error is raised,
        # add NotFound to the MSCI company name column and a rating of 0 for the current company
        isrunning = 0
        df.iloc[index,16] = 'NotFound'
        df.iloc[index,14] = 0
        company_rating = 0
        company_name_msci = ''
        driver.close()
        
    # REFINITIVE 
    
    # Same process as MSCI except the main difference is the Refinitive website does not allow you to type
    # in the searchbar before accepting/rejecting the cookies therefore a cookie rejection step added
    try:
        driver = webdriver.Chrome('C:\chromedriver.exe', options = options)
        driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
        wait = WebDriverWait(driver,5)
        action = ActionChains(driver)
        driver.get('https://www.refinitiv.com/en/sustainable-finance/esg-scores')
        wait.until(EC.presence_of_element_located((By.ID,'onetrust-reject-all-handler')))
        cookies = driver.find_element(By.ID, 'onetrust-reject-all-handler')
        cookies.click()

        searchbar = driver.find_element(By.ID, 'searchInput-1')
        searchbar.send_keys(company_name)

        wait.until(EC.presence_of_element_located((By.XPATH,'//*[@id="searchInput-1-typeaheadItem-0"]/button')))

        button = driver.find_element(By.XPATH,'//*[@id="searchInput-1-typeaheadItem-0"]')
        button.click()
        wait.until(EC.presence_of_element_located((By.XPATH,'//*[@id="esg-data-body"]/div[2]/div/div/div/div/div/div[1]/div/div/div[1]/h3' )))

    # REFINITIVE COMPANY RATING

        rating = driver.find_element(By.XPATH, '//*[@id="esg-data-body"]/div[2]/div/div/div/div/div/div[1]/div/div/div[1]/h3')
        score = rating.text
        ref_score = score.split(": ")[-1]
        df.iat[index,15] = int(ref_score)
    # REFINITIVE COMPANY NAME
        company_name_refinitive = score.split("ESG")[0]
        df.iat[index,17] = company_name_refinitive
        driver.close()
            
    except TimeoutException as ex:
        isrunning = 0
        df.iat[index,15] = 0
        df.iat[index,17] = 'NotFound'
        ref_score = 0
        company_name_refinitive = ''
        driver.close()

In [None]:
df.to_excel('SDG2000_MSCI_Refinitive.xlsx') # exporting the scraped database as an excel