## Have the following installed before running

- html5lib
- requests
- lxml
- BeautifulSoup
- pandas

In [1]:
import requests
import html5lib
import lxml
import pandas as pd
from bs4 import BeautifulSoup

### Some websites have systems in place to prevent HTTP requests from being fulfilled. If possible, use the following code as a header argument in the request.get() function

- Use the following if the request status is 400 or 403

In [2]:
headers = {
    'Access-Control-Allow-Origin': '*',
    'Access-Control-Allow-Methods': 'GET',
    'Access-Control-Allow-Headers': 'Content-Type',
    'Access-Control-Max-Age': '3600',
    'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0'
}

# Retrieval of ESG data from Investors.com

Investors.com compiled their top 100 desired companies and tabulated the all the relevant scores in a table

In [3]:
url = 'https://www.investors.com/news/esg-companies-list-best-esg-stocks-environmental-social-governance-values'
req = requests.get(url, headers=headers)
req

<Response [200]>

In [4]:
# The following retrieves and display every single element in the html document
# Note, the retrieval of data from multiple pages requires > 1 HTTP request call to be made
html = BeautifulSoup(req.content, 'html.parser')
print(html.prettify())

<!DOCTYPE doctype html>
<html class="no-js" lang="en-US">
 <head>
  <meta charset="utf-8"/>
  <meta content="ie=edge" http-equiv="x-ua-compatible"/>
  <meta content="width=device-width, initial-scale=1" name="viewport"/>
  <script>
   var envURL = 'investors.com';
        window.envURL = envURL;
        thempletpath = 'https://www.investors.com';
        if ('scrollRestoration' in history) {
            history.scrollRestoration = 'manual';
        }
  </script>
  <meta content="All News And Stock Ideas|ESG Investing|Special Reports" name="tag"/>
  <script src="https://services.investors.com/services/userprofile.aspx">
  </script>
  <script>
   var digitalData = {"pageName":"100 Best ESG Companies: Top Stocks For Environmental, Social And Governance Values","articleName":"100 Best ESG Companies: Top Stocks For Environmental, Social And Governance Values","articlePosition":"Position 1","channel":"News & Analysis","contentType":"Articles","subSection1":"News","subSection2":"","subSection

In [5]:
# Retrieve the table containing the ESG scores
esgTable = html.find('table', class_='tableizer-table')
header = esgTable.find('thead')
headers = header.find_all('th')
headers = [headCol.text.strip() for headCol in headers if isinstance(headCol.text.strip(), type(None)) == False]

data = []
table_body = esgTable.find('tbody')
rows = table_body.find_all('tr')
for row in rows:
    cols = row.find_all('td')
    data.append([ele.text.strip() for ele in cols])
    
ESG_df = pd.DataFrame(data, columns=headers)
ESG_df

Unnamed: 0,Rank,Company,Symbol,Industry,ESG Score,Comp Rtg,RS Rtg,EPS Rtg,SMR Rtg,3-Yr EPS Growth Rate,ROE,Last Qtr Sales % Chg,Last Qtr EPS % Chg,Div Yld
0,1,Microsoft,MSFT,Computer Software-Desktop,76.30,99,84,94,A,25%,47%,21%,49%,0.7
1,2,Linde,LIN,Chemicals-Specialty,76.00,94,66,85,B,16,9,19,42,1.3
2,3,Accenture,ACN,Computer-Tech Services,75.95,97,84,81,A,9,33,21,26,1.0
3,4,J.B. Hunt,JBHT,Transportation-Trucking,74.14,89,68,81,B,6,21,36,41,0.7
4,5,Xylem,XYL,Machinery-Tools & Resources,73.89,87,87,79,B,-8,13,16,65,0.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,96,Tractor Supply,TSCO,Retail/Wholesale Building Products,62.21,89,78,87,A,27,46,13,10,1.1
96,97,Chipotle,CMG,Retail-Restaurants,62.12,99,89,98,B,21,16,39,1765,n.a.
97,98,Zoetis,ZTS,Medical-Ethical Drugs,62.06,95,80,91,A,14,57,26,34,0.5
98,99,Gentherm,THRM,Auto/Truck Original Equipment,61.95,92,88,77,A,12,14,96,383,n.a.


# Retrieval of ESG data from MSCI

User can submit a request for which company they want to analyse/review using
   - Company's name
   - Stock ticker
  
### Retrieving data from MSCI and getting the ESG rating is trickier but its doable! It requires an ID at the back of the url, which is uniquely generated for each counter. Is there a way to store it?

![Screenshot%202022-02-09%20at%203.05.35%20PM.png](attachment:Screenshot%202022-02-09%20at%203.05.35%20PM.png)

Notice from the div tags, the ESG rating attained by the company is color-coded in "white", while the rest are in grey. Hence, we just need to find the "grey" text and parse out the final grade (at the end of the class name for each ratingdata-cell

## Find a way to store all of the ids for each ticker

document.querySelector("#_esgratingsprofile_esg-ratings-profile-header > div > div.ratingdata-container > div.ratingdata-outercircle.esgratings-profile-header-yellow")

In [6]:
ticker = {
    'apple-inc': 'IID000000002157615',
    
}

## Let us retrieve the ESG data for Apple Inc from MSCI 

In [7]:
main_url = 'https://www.msci.com/our-solutions/esg-investing/esg-ratings/esg-ratings-corporate-search-tool/issuer/'
company = 'apple-inc'
fullUrl = f"{main_url}{company}/{ticker[company]}"
req = requests.get(fullUrl)

In [8]:
html = BeautifulSoup(req.content, 'html.parser')
print(html.prettify())

<!DOCTYPE html>
<html class="ltr" dir="ltr" itemscope="" itemtype="http://schema.org/WebSite" lang="en-US">
 <head>
  <title>
   404 Page Not Found - MSCI
  </title>
  <meta content="initial-scale=1.0, width=device-width" name="viewport"/>
  <link href="https://www.msci.com/o/msci/images/favicon.ico" rel="icon" sizes="16x16" type="image/png"/>
  <meta content="text/html; charset=utf-8" http-equiv="content-type"/>
  <script data-senna-track="permanent" src="/o/frontend-js-lodash-web/lodash/lodash.js" type="text/javascript">
  </script>
  <script data-senna-track="permanent" src="/o/frontend-js-lodash-web/lodash/util.js" type="text/javascript">
  </script>
  <link href="https://www.msci.com/o/msci/images/favicon.ico" rel="icon"/>
  <link data-senna-track="temporary" href="https://www.msci.com" rel="canonical"/>
  <link data-senna-track="temporary" href="https://www.msci.com/zh/" hreflang="zh-CN" rel="alternate"/>
  <link data-senna-track="temporary" href="https://www.msci.com" hreflang="

In [9]:
# Retrieve the table containing the ESG scores
esgTable = html.find('div', class_='ratingdata-table')
print(esgTable)
# header = esgTable.find('thead')
# headers = header.find_all('th')
# headers = [headCol.text.strip() for headCol in headers if isinstance(headCol.text.strip(), type(None)) == False]

# data = []
# table_body = esgTable.find('tbody')
# rows = table_body.find_all('tr')
# for row in rows:
#     cols = row.find_all('td')
#     data.append([ele.text.strip() for ele in cols])
    
# ESG_df = pd.DataFrame(data, columns=headers)
# ESG_df

None
