In [51]:
import pandas as pd  # For handling data in DataFrame
from selenium import webdriver  # For controlling the web browser and interacting with HTML content
from selenium.webdriver.chrome.service import Service  # For setting up ChromeDriver as a service
from selenium.webdriver.common.by import By  # To locate HTML elements by XPath
from selenium.webdriver.chrome.options import Options  # For handling Chrome options
from webdriver_manager.chrome import ChromeDriverManager  # For automatic ChromeDriver installation
import time  # For handling wait times
import random  # For randomizing scroll and wait times

In [53]:
chrome_options = Options()
# chrome_options.add_argument('--headless')  # Optional: uncomment if you want to run it without opening browser
service = Service(ChromeDriverManager().install())
browser = webdriver.Chrome(service=service, options=chrome_options)

url = 'https://www.sustainalytics.com/esg-ratings'
browser.get(url)
browser.maximize_window()
time.sleep(random.uniform(3, 7))

In [55]:
def scrape_esg(browser):
    Company = []
    Ticker = []
    ESGscore = []
    

    rows = browser.find_elements(By.XPATH,'//section[@id="company_ratings"]/div[contains(@class, "company-row")]')  

    for row in rows:
        try:
            company_element = row.find_element(By.XPATH, './/div[contains(@class, "w-50")][1]/a')
            company = company_element.text.strip()
        except:
            company = "N/A"
        Company.append(company)

        try:
            ticker_element = row.find_element(By.XPATH, './/div[contains(@class, "w-50")][1]/small')
            ticker = ticker_element.text.strip()
        except:
            ticker = "N/A"
        Ticker.append(ticker)

        try:
            esgscore_element = row.find_element(By.XPATH, './/div[contains(@class, "company-score")]/div[contains(@class, "row")]')
            esgscore = esgscore_element.text.strip()
        except:
            esgscore = "N/A"
        ESGscore.append(esgscore)

    return Company, Ticker, ESGscore

In [57]:
Company_Name = []
Ticker_Name = []
ESG_score = [] 

#looping through all the pages to get the data 
for page_number in range(1, 1400): 
    print(f"Scaping page {page_number}...")

    time.sleep(random.uniform(2,4))

    Company, Ticker, ESGscore = scrape_esg(browser)
    Company_Name.extend(Company)
    Ticker_Name.extend(Ticker)
    ESG_score.extend(ESGscore)

    #making sure each pages saves so if my computer crashes the data is there 
    df = pd.DataFrame({
        'Company': Company_Name,
        'Ticker': Ticker_Name,
        'ESG Score': ESG_score
    })
    df.to_csv('esg_data.csv', index=False)

    try:
        next_page = browser.find_element(By.XPATH, f'//a[text()="{page_number + 1}"]')
        browser.execute_script("arguments[0].click();", next_page)
    except Exception as e:
        print(f"Could not go to page {page_number + 1}: {e}")
        break

Scaping page 1...
Scaping page 2...
Scaping page 3...
Scaping page 4...
Scaping page 5...
Scaping page 6...
Scaping page 7...
Scaping page 8...
Scaping page 9...
Scaping page 10...
Scaping page 11...
Scaping page 12...
Scaping page 13...
Scaping page 14...
Scaping page 15...
Scaping page 16...
Scaping page 17...
Scaping page 18...
Scaping page 19...
Scaping page 20...
Scaping page 21...
Scaping page 22...
Scaping page 23...
Scaping page 24...
Scaping page 25...
Scaping page 26...
Scaping page 27...
Scaping page 28...
Scaping page 29...
Scaping page 30...
Scaping page 31...
Scaping page 32...
Scaping page 33...
Scaping page 34...
Scaping page 35...
Scaping page 36...
Scaping page 37...
Scaping page 38...
Scaping page 39...
Scaping page 40...
Scaping page 41...
Scaping page 42...
Scaping page 43...
Scaping page 44...
Scaping page 45...
Scaping page 46...
Scaping page 47...
Scaping page 48...
Scaping page 49...
Scaping page 50...
Scaping page 51...
Scaping page 52...
Scaping page 53...
Sc

In [95]:
esg_df = pd.read_csv('esg_data.csv')
esg_df

Unnamed: 0,Company,Ticker,ESG Score
0,"1-800-FLOWERS.COM, Inc.",NAS:FLWS,28.9\nMedium ESG Risk
1,1&1 AG,ETR:1U1,27.7\nMedium ESG Risk
2,"10X Genomics, Inc.",NAS:TXG,22.5\nMedium ESG Risk
3,11 Bit Studios SA,WAR:11B,16.3\nLow ESG Risk
4,1st Source Corp.,NAS:SRCE,35.0\nHigh ESG Risk
...,...,...,...
13979,Zydus Wellness Ltd.,BOM:531335,27.9\nMedium ESG Risk
13980,"Zylox-Tonbridge Medical Technology Co., Ltd.",HKG:2190,25.6\nMedium ESG Risk
13981,"Zymeworks, Inc.",NAS:ZYME,29.0\nMedium ESG Risk
13982,"Zynex, Inc.",NAS:ZYXI,32.0\nHigh ESG Risk


In [97]:
#splitting the ticker column into the number and the stock index 
esg_df[['Ticker_1', 'Ticker_2']] = esg_df['Ticker'].str.split(':', expand=True)

esg_df.head() 

Unnamed: 0,Company,Ticker,ESG Score,Ticker_1,Ticker_2
0,"1-800-FLOWERS.COM, Inc.",NAS:FLWS,28.9\nMedium ESG Risk,NAS,FLWS
1,1&1 AG,ETR:1U1,27.7\nMedium ESG Risk,ETR,1U1
2,"10X Genomics, Inc.",NAS:TXG,22.5\nMedium ESG Risk,NAS,TXG
3,11 Bit Studios SA,WAR:11B,16.3\nLow ESG Risk,WAR,11B
4,1st Source Corp.,NAS:SRCE,35.0\nHigh ESG Risk,NAS,SRCE


In [99]:
# drop the original ticker column 
esg_df = esg_df.drop(columns=['Ticker'])

In [101]:
# split the esg score and the risk 
esg_df[['ESG score', 'ESG risk']] = esg_df['ESG Score'].str.split('\n', expand=True)
esg_df

Unnamed: 0,Company,ESG Score,Ticker_1,Ticker_2,ESG score,ESG risk
0,"1-800-FLOWERS.COM, Inc.",28.9\nMedium ESG Risk,NAS,FLWS,28.9,Medium ESG Risk
1,1&1 AG,27.7\nMedium ESG Risk,ETR,1U1,27.7,Medium ESG Risk
2,"10X Genomics, Inc.",22.5\nMedium ESG Risk,NAS,TXG,22.5,Medium ESG Risk
3,11 Bit Studios SA,16.3\nLow ESG Risk,WAR,11B,16.3,Low ESG Risk
4,1st Source Corp.,35.0\nHigh ESG Risk,NAS,SRCE,35.0,High ESG Risk
...,...,...,...,...,...,...
13979,Zydus Wellness Ltd.,27.9\nMedium ESG Risk,BOM,531335,27.9,Medium ESG Risk
13980,"Zylox-Tonbridge Medical Technology Co., Ltd.",25.6\nMedium ESG Risk,HKG,2190,25.6,Medium ESG Risk
13981,"Zymeworks, Inc.",29.0\nMedium ESG Risk,NAS,ZYME,29.0,Medium ESG Risk
13982,"Zynex, Inc.",32.0\nHigh ESG Risk,NAS,ZYXI,32.0,High ESG Risk


In [103]:
# drop the original esg score column 
esg_df = esg_df.drop(columns=['ESG Score'])

In [105]:
# cleaning up esg risk 
esg_df['ESG risk'] = esg_df['ESG risk'].str.replace(' ESG Risk', '', regex=False)

In [107]:
# looking at the data frame 
esg_df

Unnamed: 0,Company,Ticker_1,Ticker_2,ESG score,ESG risk
0,"1-800-FLOWERS.COM, Inc.",NAS,FLWS,28.9,Medium
1,1&1 AG,ETR,1U1,27.7,Medium
2,"10X Genomics, Inc.",NAS,TXG,22.5,Medium
3,11 Bit Studios SA,WAR,11B,16.3,Low
4,1st Source Corp.,NAS,SRCE,35.0,High
...,...,...,...,...,...
13979,Zydus Wellness Ltd.,BOM,531335,27.9,Medium
13980,"Zylox-Tonbridge Medical Technology Co., Ltd.",HKG,2190,25.6,Medium
13981,"Zymeworks, Inc.",NAS,ZYME,29.0,Medium
13982,"Zynex, Inc.",NAS,ZYXI,32.0,High


In [109]:
esg_df.to_csv('esg_data_full.csv')