In [None]:
"""
ESG Risk:

A Real-Time ESG Risk Scoring Framework for Company Filings 

Authored in 2025 by Aaron Walker


Change log:
- 04/09/2025: added def text_to_scored_ESG_() & def ESG_risk_score()

"""
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import pdfplumber
import re
import urllib.request
from pdf_parser import url_to_pdf
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline # for using the models


#Use ESG-BERT (Bidirectional Encoder Representations from Transformers) for natural language processing of company SEC filings
#Input: databbase of SEC filings by company.
#Outout: ESG Risk score and classifcation (Low, Med, High)

#To Do list:
# 1) ETL (Extract) pipeline using either responsibility reports.com (volentary ESG reports, likely effected by greenwashing) or SEC filings (best)
# 2) Transform & load: PDF sentence classification into E,S,G
# 3) Import ESG Models, return esg classifications per sentence
# 4) Import ESG sentence classifications, analyse and return sentiment
# 5) Calculate risk score and classify company
# 6+) Apply MLOPS and productionarise: Containerise, orchestrate, monitor -> finetune ESG hugging face model -> Visualisations in streamlit server
# Write up paper and compare with ESG literature

In [82]:
# 1) Extract pdfs given url to report

#Extract from:
#https://www.responsibilityreports.com/ and
#https://www.sec.gov/edgar/search/

company_url_list = [
"https://www.responsibilityreports.com/HostedData/ResponsibilityReports/PDF/NASDAQ_AMD_2024.pdf",
"https://www.responsibilityreports.com/HostedData/ResponsibilityReports/PDF/NYSE_MCD_2023.pdf",
"https://www.responsibilityreports.com/HostedData/ResponsibilityReports/PDF/LSE_BT_2023.pdf"
]

url_to_pdf(company_url_list)

In [None]:
# 2) Transfrorm & load: Convert from PDF to list of clean sentences

pdf_location = r'data/NYSE_MCD_2023.pdf'
def pdfparser(pdf_location):
    def pdf_to_text(pdf_url):
        """
        Converts PDF into string
        """
        all_text = ""
        with pdfplumber.open(f"{pdf_url}") as pdf:
            for page in pdf.pages:
                text = page.extract_text()
                if text: 
                    all_text += text + " "
        return all_text.strip()

    def get_clean_text(text):
        """
        Cleans input text
        """
        text = text.replace("\n"," ") # convert newline into space
        text = re.sub(r'\s+', ' ', text) #convert multuple spaces into single space 
        text = re.sub(r'[•*]','', text) # removes symbols • and *
        return text.strip()

    def get_clean_text_to_list(words):
        """
        Splits clean text from words into sentences, then returns as list
        """
        sentences = get_clean_text(words)
        sentences = re.split(r'(?<=[.!?])\s+', sentences)
        return [s.strip() for s in sentences if s.strip()]
        
    pdf_text = pdf_to_text(pdf_location)
    return get_clean_text_to_list(pdf_text)

clean_text = pdfparser(pdf_location)

In [None]:
print(f"Length of company filing in sentences: {len(clean_text)}")
print(f"Sample sentences: {clean_text[:3]}")

Length of company filing in sentences: 1596
Sample sentences: ['2023–2024 Our Purpose & Impact Report McDonald’s Corporation Impact Report McDonald’s Corporation Purpose & Impact Report 2023–2024 Introduction Our Planet Food Quality & Sourcing Jobs, Inclusion & Empowerment Community Connection Appendix 2 Our purpose is to feed and foster communities.', 'As a leading global foodservice retailer, we believe it’s our responsibility to make a positive impact on the world.', 'We’re driving that impact by living our purpose.']


In [None]:
# 3) Import ESG Models, return esg classifications per sentence

### Load the models (takes ca. 1 min)
# Environmental model.
name = "ESGBERT/EnvironmentalBERT-environmental" # path to download from HuggingFace
# In simple words, the tokenizer prepares the text for the model and the model classifies the text-
tokenizer = AutoTokenizer.from_pretrained(name)
model = AutoModelForSequenceClassification.from_pretrained(name)
# The pipeline combines tokenizer and model to one process.
pipe_env = pipeline("text-classification", model=model, tokenizer=tokenizer)

# Also load the social and governance model.
# Social model.
name = "ESGBERT/SocialBERT-social"
tokenizer = AutoTokenizer.from_pretrained(name)
model = AutoModelForSequenceClassification.from_pretrained(name)
pipe_soc = pipeline("text-classification", model=model, tokenizer=tokenizer)

# Governance model.
name = "ESGBERT/GovernanceBERT-governance"
tokenizer = AutoTokenizer.from_pretrained(name)
model = AutoModelForSequenceClassification.from_pretrained(name)
pipe_gov = pipeline("text-classification", model=model, tokenizer=tokenizer)

pipe_senti = pipeline("sentiment-analysis", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english", framework="pt")

Device set to use cpu
Device set to use cpu
Device set to use cpu
Device set to use cpu


In [None]:
# example outputs
test = pipe_env(clean_text[:10]) # Test for environmental classification..
test1 = pipe_soc(clean_text[:10])
test2 = pipe_gov(clean_text[:10])
test3 = pipe_senti(clean_text[:10])
print(test)
print(test1)
print(test2)
print(test3)

[{'label': 'environmental', 'score': 0.9169052243232727}, {'label': 'none', 'score': 0.9282312989234924}, {'label': 'none', 'score': 0.9234648942947388}, {'label': 'environmental', 'score': 0.9907447695732117}, {'label': 'environmental', 'score': 0.9916232228279114}, {'label': 'environmental', 'score': 0.8489450812339783}, {'label': 'environmental', 'score': 0.7868526577949524}, {'label': 'environmental', 'score': 0.9837659001350403}, {'label': 'none', 'score': 0.9931159615516663}, {'label': 'none', 'score': 0.9375211000442505}]
[{'label': 'social', 'score': 0.9994576573371887}, {'label': 'social', 'score': 0.8108693957328796}, {'label': 'social', 'score': 0.9995493292808533}, {'label': 'social', 'score': 0.9996368885040283}, {'label': 'social', 'score': 0.9993778467178345}, {'label': 'social', 'score': 0.9995601773262024}, {'label': 'none', 'score': 0.9356610178947449}, {'label': 'social', 'score': 0.999762237071991}, {'label': 'social', 'score': 0.9994425177574158}, {'label': 'social

In [None]:
sample_text = clean_text
ESG_features = [pipe_env, pipe_soc, pipe_gov, pipe_senti]
def text_to_scored_ESG_(ESG_features):
    """_
    Applies ESG model to text, returns df of sentence labels and scores 
    
    """
    report_df = pd.DataFrame({"sentence":sample_text})
     
    for pipe in ESG_features:

        results = pipe(sample_text, padding=True, truncation=True)

        labels = [x["label"] for x in results]
        scores = [x["score"] for x in results]


        if pipe == pipe_env:
            report_df["Environment"] = labels 
            report_df["Env_score"] = scores
            
        elif pipe == pipe_soc:
            report_df["Social"] = labels 
            report_df["Social_score"] = scores
            
        elif pipe == pipe_gov:
            report_df["Government"] = labels 
            report_df["Gov_score"] = scores
            
        else:
            report_df["Sentiment"] = labels 
            report_df["Senti_score"] = scores
            
            
    return report_df
        
df = text_to_scored_ESG_(ESG_features)
df.head(10)

Unnamed: 0,sentence,Environment,Env_score,Social,Social_score,Government,Gov_score,Sentiment,Senti_score
0,2023–2024 Our Purpose & Impact Report McDonald...,environmental,0.916905,social,0.999458,none,0.990539,POSITIVE,0.996353
1,"As a leading global foodservice retailer, we b...",none,0.928231,social,0.810869,none,0.9933,POSITIVE,0.999735
2,We’re driving that impact by living our purpose.,none,0.923465,social,0.999549,none,0.997535,POSITIVE,0.99969
3,We believe the actions we continue to take tod...,environmental,0.990745,social,0.999637,none,0.994189,POSITIVE,0.999631
4,One of these actions is reporting on our envir...,environmental,0.991623,social,0.999378,none,0.990624,POSITIVE,0.737077
5,McDonald’s Corporation Purpose & Impact Report...,environmental,0.848945,social,0.99956,none,0.98625,POSITIVE,0.999402
6,Our System works as one to feed and foster nat...,environmental,0.786853,none,0.935661,none,0.994176,POSITIVE,0.999484
7,Whether we are helping deliver more solutions ...,environmental,0.983766,social,0.999762,none,0.985948,POSITIVE,0.987858
8,making balanced meals more people and the comm...,none,0.993116,social,0.999443,none,0.997466,POSITIVE,0.998435
9,together to advance collective impact.,none,0.937521,social,0.998949,none,0.995995,POSITIVE,0.999727


In [None]:
#ESG Risk scoring:

def esg_risk_score(row):
    risk = 0
    # Environment
    if row["Environment"] != "none" and row["Sentiment"] == "NEGATIVE":
        risk += row["Env_score"] * row["Senti_score"]
    # Social
    if row["Social"] != "none" and row["Sentiment"] == "NEGATIVE":
        risk += row["Social_score"] * row["Senti_score"]
    # Governance
    if row["Government"] != "none" and row["Sentiment"] == "NEGATIVE":
        risk += row["Gov_score"] * row["Senti_score"]
    return risk

# Apply to dataframe
df["ESG_risk_score"] = df.apply(esg_risk_score, axis=1)

# Optional: total ESG risk for the whole document/report
total_esg_risk = df["ESG_risk_score"].sum()

print(df[["sentence","ESG_risk_score"]])
print("Total ESG risk:", total_esg_risk)

                                               sentence  ESG_risk_score
0     2023–2024 Our Purpose & Impact Report McDonald...        0.000000
1     As a leading global foodservice retailer, we b...        0.000000
2      We’re driving that impact by living our purpose.        0.000000
3     We believe the actions we continue to take tod...        0.000000
4     One of these actions is reporting on our envir...        0.000000
...                                                 ...             ...
1591                      Given the Russia and Vietnam.        0.000000
1592  McDonald’s requires all wood fiber sourced fro...        0.934615
1593  Exclusions: Primary regions, with the exceptio...        0.000000
1594  Exclusions: liners, straws and limited locally...        0.000000
1595  Soy used as an ingredient in McDonald’s produc...        0.000000

[1596 rows x 2 columns]
Total ESG risk: 191.00323072870276


In [80]:
df_sorted = df.sort_values(by="ESG_risk_score", ascending=False)
df_sorted

Unnamed: 0,sentence,Environment,Env_score,Social,Social_score,Government,Gov_score,Sentiment,Senti_score,ESG_risk_score
299,Perfluorinated compounds are known to be histo...,environmental,0.990497,social,0.998896,none,0.991520,NEGATIVE,0.994213,1.977881
216,For the purposes 8 Underrepresented Groups Pay...,environmental,0.970604,social,0.998744,none,0.995859,NEGATIVE,0.999469,1.968302
97,"In these cases, we have relied In This Report ...",environmental,0.989429,social,0.999451,none,0.993774,NEGATIVE,0.985005,1.959056
515,Other similar material items to be launched to...,environmental,0.971610,social,0.999118,none,0.988126,NEGATIVE,0.993948,1.958801
1417,McDonald’s Corporation Purpose & Impact Report...,environmental,0.942567,social,0.994735,none,0.959237,NEGATIVE,0.984913,1.908073
...,...,...,...,...,...,...,...,...,...,...
592,We’re committed to reducing food production im...,environmental,0.983775,none,0.996080,none,0.995648,POSITIVE,0.975839,0.000000
591,We continue to monitor industry standards McDo...,environmental,0.832295,social,0.999659,none,0.988536,POSITIVE,0.997631,0.000000
589,Includes all suppliers of primary- acreage tha...,none,0.997056,none,0.999945,none,0.998028,NEGATIVE,0.895884,0.000000
588,McDonald’s commits to not intentionally adding...,none,0.776882,none,0.999904,none,0.987899,POSITIVE,0.851106,0.000000


In [None]:
# 5) Calculate risk score and classify company: save to dataframe or database (postgres) and add fast api to look up company and return risk score and classification
#to do:
#def store_ESG_Risk_Scores():

In [None]:
#def Run_ESG_Risk_analysis(list_of_companies):
    

In [None]:
#def return_ESG_Risk_Score():
#use FastAPI to query and return scores from ESG_Risk_database
    

In [None]:
# 6+) Apply MLOPS and productionarise: Containerise, orchestrate, monitor -> finetune ESG hugging face model -> Visualisations in streamlit server