In [1]:
 import pandas as pd

In [19]:
cleaned_reports = pd.read_csv('data/cleaned_reports.csv')
cleaned_reports = cleaned_reports[['Sentence','cleaned_text','Year']]
cleaned_reports

Unnamed: 0,Sentence,cleaned_text,Year
0,Foreword Enabling sustainability for our compa...,foreword enabling sustainability company custo...,2022
1,"Extreme weather caused devastating droughts, w...",extreme weather caused devastating droughts wi...,2022
2,We felt the effects of climate change like nev...,felt effects climate change like never planet ...,2022
3,The most recent report from the Intergovernmen...,recent report intergovernmental panel climate ...,2022
4,Meaningful climate action requires an enduring...,meaningful climate action requires enduring co...,2022
...,...,...,...
9614,Microsofts diversity education programs provid...,microsofts diversity education programs provid...,2003
9615,Businesses Microsoft sponsors a variety of pro...,businesses sponsors variety programs designed ...,2003
9616,"Since its inception, the national awardwinning...",since inception national awardwinning build bu...,2003
9617,"At Microsoft, our commitment to diversity goes...",commitment diversity goes beyond recruiting em...,2003


In [20]:
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer

# Download the VADER lexicon
nltk.download('vader_lexicon')

# Initialize the VADER sentiment analyzer
sia = SentimentIntensityAnalyzer()
# Apply VADER to each text and store the compound score
cleaned_reports['vader_score'] = cleaned_reports['Sentence'].apply(lambda text: sia.polarity_scores(text)['compound'])

cleaned_reports

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/macbookair/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Unnamed: 0,Sentence,cleaned_text,Year,vader_score
0,Foreword Enabling sustainability for our compa...,foreword enabling sustainability company custo...,2022,0.0000
1,"Extreme weather caused devastating droughts, w...",extreme weather caused devastating droughts wi...,2022,-0.7003
2,We felt the effects of climate change like nev...,felt effects climate change like never planet ...,2022,0.2500
3,The most recent report from the Intergovernmen...,recent report intergovernmental panel climate ...,2022,-0.5106
4,Meaningful climate action requires an enduring...,meaningful climate action requires enduring co...,2022,0.8519
...,...,...,...,...
9614,Microsofts diversity education programs provid...,microsofts diversity education programs provid...,2003,0.5574
9615,Businesses Microsoft sponsors a variety of pro...,businesses sponsors variety programs designed ...,2003,0.7096
9616,"Since its inception, the national awardwinning...",since inception national awardwinning build bu...,2003,0.6908
9617,"At Microsoft, our commitment to diversity goes...",commitment diversity goes beyond recruiting em...,2003,0.3818


In [21]:
cleaned_reports.dropna(subset=['cleaned_text'], inplace=True)

In [22]:
has_nulls = cleaned_reports['cleaned_text'].isnull().any()
print("Are there any null values in 'cleaned_text'?", has_nulls)


Are there any null values in 'cleaned_text'? False


In [23]:
from joblib import dump, load
loaded_model = load('models/voting_classifier_model.joblib')
print("Model loaded")
loaded_vectorizer = load('models/tfidf_vectorizer.joblib')
print("Vecotrizer loaded")

# Transform the text data using the loaded vectorizer
transformed_texts = loaded_vectorizer.transform(cleaned_reports['cleaned_text'])

Model loaded
Vecotrizer loaded


In [24]:
# Get predictions
predictions = loaded_model.predict(transformed_texts)

In [25]:
# Add predictions to the DataFrame
cleaned_reports['predicted_class'] = predictions
cleaned_reports

Unnamed: 0,Sentence,cleaned_text,Year,vader_score,predicted_class
0,Foreword Enabling sustainability for our compa...,foreword enabling sustainability company custo...,2022,0.0000,Social
1,"Extreme weather caused devastating droughts, w...",extreme weather caused devastating droughts wi...,2022,-0.7003,Environmental
2,We felt the effects of climate change like nev...,felt effects climate change like never planet ...,2022,0.2500,Environmental
3,The most recent report from the Intergovernmen...,recent report intergovernmental panel climate ...,2022,-0.5106,Environmental
4,Meaningful climate action requires an enduring...,meaningful climate action requires enduring co...,2022,0.8519,Environmental
...,...,...,...,...,...
9614,Microsofts diversity education programs provid...,microsofts diversity education programs provid...,2003,0.5574,Social
9615,Businesses Microsoft sponsors a variety of pro...,businesses sponsors variety programs designed ...,2003,0.7096,Social
9616,"Since its inception, the national awardwinning...",since inception national awardwinning build bu...,2003,0.6908,Social
9617,"At Microsoft, our commitment to diversity goes...",commitment diversity goes beyond recruiting em...,2003,0.3818,Social


In [26]:
cleaned_reports = cleaned_reports[cleaned_reports['predicted_class'] != 'General']
cleaned_reports

Unnamed: 0,Sentence,cleaned_text,Year,vader_score,predicted_class
0,Foreword Enabling sustainability for our compa...,foreword enabling sustainability company custo...,2022,0.0000,Social
1,"Extreme weather caused devastating droughts, w...",extreme weather caused devastating droughts wi...,2022,-0.7003,Environmental
2,We felt the effects of climate change like nev...,felt effects climate change like never planet ...,2022,0.2500,Environmental
3,The most recent report from the Intergovernmen...,recent report intergovernmental panel climate ...,2022,-0.5106,Environmental
4,Meaningful climate action requires an enduring...,meaningful climate action requires enduring co...,2022,0.8519,Environmental
...,...,...,...,...,...
9614,Microsofts diversity education programs provid...,microsofts diversity education programs provid...,2003,0.5574,Social
9615,Businesses Microsoft sponsors a variety of pro...,businesses sponsors variety programs designed ...,2003,0.7096,Social
9616,"Since its inception, the national awardwinning...",since inception national awardwinning build bu...,2003,0.6908,Social
9617,"At Microsoft, our commitment to diversity goes...",commitment diversity goes beyond recruiting em...,2003,0.3818,Social


In [27]:

yearly_category_avg_score = cleaned_reports.groupby(['Year', 'predicted_class']).vader_score.mean().reset_index()

print("Average Daily Sentiment:")
yearly_category_avg_score

Average Daily Sentiment:


Unnamed: 0,Year,predicted_class,vader_score
0,2003,Environmental,-0.026744
1,2003,Governance,0.253578
2,2003,Social,0.44503
3,2005,Environmental,0.27945
4,2005,Governance,0.519736
5,2005,Social,0.482264
6,2006,Environmental,0.43868
7,2006,Governance,0.263533
8,2006,Social,0.500849
9,2007,Environmental,0.35928


In [28]:
yearly_category_avg_score.to_csv('data/ESG_reports_scores.csv')