In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
import ipywidgets as widgets
from ipywidgets import interact

In [2]:
try:
    '''
    Requesting data from a given URL and saving it as text
    '''
    web_data = requests.get('https://finance.yahoo.com/quote/AAPL/sustainability').text
    print('Success')
except:
    print('Could not get URL')

Success


In [3]:
soup = BeautifulSoup(web_data, "html.parser")

In [4]:
# Scraping the overall ESG score
esg_score = soup.find('div', {'class':'Fz(36px) Fw(600) D(ib) Mend(5px)'})
print(esg_score)

<div class="Fz(36px) Fw(600) D(ib) Mend(5px)" data-reactid="20">17</div>


In [5]:
data_point = esg_score.text
print(data_point)

17


In [6]:
# Scraping the specific ESG scores
elements = []
scores = soup.find_all('div', {'class':'D(ib) Fz(23px) smartphone_Fz(22px) Fw(600)'})
for score in scores:
    elements.append(score.text)

In [7]:
print(elements)

['0.1', '7.7', '8.9']


In [8]:
# Scraping controversy level data
controversy_score = soup.find('div', {'class':'D(ib) Fz(36px) Fw(500)'})
ctr_data = controversy_score.text
print(ctr_data)

3


In [9]:
df = pd.DataFrame({'Total ESG Score': data_point, 
                 'Environment': elements[0], 
                 'Social': elements[1], 
                 'Governance': elements[2], 
                 'Controversy Score': ctr_data}, 
                 index = ['AAPL'])
df

Unnamed: 0,Total ESG Score,Environment,Social,Governance,Controversy Score
AAPL,17,0.1,7.7,8.9,3


In [10]:
# create a function that gathers all of the tickers in the S&P 500
# def get_tickers():
wiki_page = requests.get('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies').text
sp_500 = pd.read_html(wiki_page)
ticker_df = sp_500[0]
tickers = ticker_df['Symbol']
# return ticker_options

In [11]:
type(tickers)

pandas.core.series.Series

In [12]:
tickers.head(20)

0      MMM
1      ABT
2     ABBV
3     ABMD
4      ACN
5     ATVI
6     ADBE
7      AMD
8      AAP
9      AES
10     AFL
11       A
12     APD
13    AKAM
14     ALK
15     ALB
16     ARE
17    ALXN
18    ALGN
19    ALLE
Name: Symbol, dtype: object

In [13]:
# Gather all ESG scores of S&P 500 firms
def yahoo_finance_esg(ticker):
    '''
    Scraping for the ESG scores for a company and appending them to a dataframe.
    
    
    This function takes a ticker (firm's abbreviation) as input and then generate the Yahoo! Finace Sustainability URL. 
    If the URL is valid, BeautifulSoup will then parse the text and save the desired data points, 
    All data then will be saved in a dataframe.
    '''
    elements = []        
    web_data = requests.get('https://finance.yahoo.com/quote/'+ticker+'/sustainability?p='+ticker).text        
    soup = BeautifulSoup(web_data, "html.parser")
    esg_score = soup.find('div', {'class':'Fz(36px) Fw(600) D(ib) Mend(5px)'})
    try:
        data_point = esg_score.text # if data_point == None:
    except:
        return None
    controversy_score = soup.find('div', {'class':'D(ib) Fz(36px) Fw(500)'})
    ctr_data = controversy_score.text
    scores = soup.find_all('div', {'class':'D(ib) Fz(23px) smartphone_Fz(22px) Fw(600)'})
    for score in scores:
        elements.append(score.text)
    
    df_esg = pd.DataFrame({'Total ESG Score': data_point, 
                   'Environment': elements[0], 
                   'Social': elements[1], 
                   'Governance': elements[2], 
                   'Controversy Score': ctr_data}, 
                   index = [ticker])
    return df_esg

In [14]:
full_data = pd.DataFrame({'Total ESG Score': [], 
                   'Environment': [], 
                   'Social': [], 
                   'Governance':[], 
                   'Controversy Score': []})

for i in tickers:
    '''
    Runs the yahoo_finance() function for each company in the tickers series and writes all the data in a consolidated dataframe.
    
    If the data is not retrieved successfully, print no data for + ticker.
    '''
    individual_record = yahoo_finance_esg(i)
    if individual_record is not None:
        print(i + " retrieved") # Progress checker
        full_data = full_data.append(individual_record)
    else:
        print("No data for " + i) # Progress checker
print(full_data)

MMM retrieved
ABT retrieved
ABBV retrieved
No data for ABMD
ACN retrieved
ATVI retrieved
ADBE retrieved
No data for AMD
AAP retrieved
AES retrieved
AFL retrieved
A retrieved
APD retrieved
AKAM retrieved
No data for ALK
ALB retrieved
ARE retrieved
ALXN retrieved
No data for ALGN
ALLE retrieved
LNT retrieved
ALL retrieved
GOOGL retrieved
No data for GOOG
MO retrieved
AMZN retrieved
No data for AMCR
AEE retrieved
AAL retrieved
AEP retrieved
AXP retrieved
AIG retrieved
AMT retrieved
AWK retrieved
AMP retrieved
ABC retrieved
AME retrieved
AMGN retrieved
APH retrieved
ADI retrieved
ANSS retrieved
ANTM retrieved
No data for AON
AOS retrieved
APA retrieved
AAPL retrieved
AMAT retrieved
APTV retrieved
ADM retrieved
ANET retrieved
AJG retrieved
AIZ retrieved
T retrieved
ATO retrieved
ADSK retrieved
ADP retrieved
AZO retrieved
AVB retrieved
AVY retrieved
No data for BKR
BLL retrieved
BAC retrieved
BK retrieved
BAX retrieved
BDX retrieved
No data for BRK.B
BBY retrieved
BIO retrieved
BIIB retrieve

In [15]:
#Saving the DataFrame to a csv file
full_data.to_csv('esg_score.csv')