In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd

import plotly.graph_objects as go
from plotly.subplots import make_subplots


from helper_functions import filter_data, calculate_daily_data

# Variable used in plots later
textfont_size = 20

In [2]:
# LOAD DATA
data = pd.read_json('data/full_data/nano_esg.json', lines=True)

start_date = '2023-01-01'
end_date = '2024-09-16'

companies = list(data['company'].unique())

sent_dict = {'positive': 1, 'negative': -1, 'neutral': 0}
aspect_filters = ['environmental', 'social', 'governance']

data['sentiment_int'] = data['sentiment'].apply(lambda x: sent_dict[x])

#For plots
aspect_colors = {'environmental': 'forestgreen', 'social': 'cornflowerblue', 'governance': 'darkmagenta'}

In [3]:
#Over whole dataset
data_all_asp = {}
daily_data_asp = {}
for aspect_filter in aspect_filters:
    all_data = filter_data(data, None, aspect_filter)
    daily_data = calculate_daily_data(all_data.copy(), start_date, end_date, fill_edges=False)
    daily_data_asp[aspect_filter] = daily_data
    data_all_asp[aspect_filter] = all_data

data_all_graph = data.set_index('date')

## Section 4

### Fig. 2: Amount of Articles per Company

In [4]:
asp_count_per_comp = {}
for company in companies:
    asp_count_per_comp[company] = {}
    for aspect_filter in aspect_filters:
        asp_count_per_comp[company][aspect_filter] = len(data_all_asp[aspect_filter][data_all_asp[aspect_filter]['company'] == company])

asp_ratio_per_comp = {}
for company in companies:
    asp_ratio_per_comp[company] = {i: asp_count_per_comp[company][i]/np.sum(list(asp_count_per_comp[company].values())) for i in asp_count_per_comp[company]}

In [5]:
#sort pos dict
asp_count_per_comp = dict(sorted(asp_count_per_comp.items(), key=lambda x: sum(x[1].values()), reverse=True))

# Create traces for each category (positive and negative stacked bars)
fig = make_subplots(
    rows=2, cols=1, 
    shared_xaxes=True, 
    row_heights=[0.67, 0.33],  # The ratio for subplot heights
    vertical_spacing=0.02      # Adjust space between the plots
)

# Adding bars for each category
for aspect_filter in aspect_filters:
    fig.add_trace(go.Bar(
        x=list(asp_count_per_comp.keys()),
        y=[asp_count_per_comp[company][aspect_filter] for company in asp_count_per_comp],
        name=aspect_filter.title(),
        # offsetgroup=1,
        # legendgroup=f'{category}',
        marker_color=aspect_colors[aspect_filter],
        # hovertemplate=f'{aspect_filter} Positive: %{{y}}<extra></extra>',
    ), row=1, col=1)

    fig.add_trace(go.Bar(
        x=list(asp_count_per_comp.keys()),
        y=[asp_ratio_per_comp[company][aspect_filter] for company in asp_count_per_comp],
        name=aspect_filter.title(),
        marker_color=aspect_colors[aspect_filter],
        showlegend=False,
    ), row=2, col=1)

# Update layout for visual styling
fig.update_layout(
    barmode='relative',
    title='Amount of Articles per Company',
    bargap=0.2,
    # plot_bgcolor='rgba(0,0,0,0)',
    height=1000,
    width=1500,
    font=dict(size=textfont_size),
    # xaxis=dict(showline=True, linewidth=1, linecolor='black', mirror=True, tickangle=45),  # Black line on x-axis
    xaxis2=dict(tickangle=45),  # Black line on x-axis
    yaxis1=dict(title='Number of Articles'),  # Black line on y-axis
    yaxis2=dict(title='Ratio of Aspects'),  # Black line on y-axis
    legend=dict(
        x=0.98,  # Position it 10% from the left side of the first subplot
        y=0.8,  # Position it vertically centered
        xanchor='right',  # Anchor the legend to the left
        yanchor='bottom',  # Anchor the legend to the middle
        bgcolor='rgba(255, 255, 255, 0.8)',  # Optional: set a background color for better visibility
        bordercolor='black',  # Optional: set border color
        borderwidth=1,  # Optional: set border width
        font = dict(size = textfont_size),
    ),
)

# Show the plot
fig.show()

### Table 1: Number & Ratio of Articles

In [6]:
sent_count_per_comp = {}
sent_ratio_per_comp = {}
for company in companies:
    sent_count_per_comp[company] = {
        'positive': len(data_all_graph[(data_all_graph['company'] == company) & (data_all_graph['sentiment'] == 'positive')]),
        'negative': len(data_all_graph[(data_all_graph['company'] == company) & (data_all_graph['sentiment'] == 'negative')]),
        'neutral': len(data_all_graph[(data_all_graph['company'] == company) & (data_all_graph['sentiment'] == 'neutral')]),
    }
for company in companies:
    sent_ratio_per_comp[company] = {
        'positive': sent_count_per_comp[company]['positive']/np.sum(list(sent_count_per_comp[company].values())),
        'negative': sent_count_per_comp[company]['negative']/np.sum(list(sent_count_per_comp[company].values())),
        'neutral': sent_count_per_comp[company]['neutral']/np.sum(list(sent_count_per_comp[company].values())),
    }

In [7]:
# Total number of articles per ESG-sentiment
pd.DataFrame(sent_count_per_comp).sum(axis=1)

positive    27304
negative    17668
neutral      6115
dtype: int64

In [8]:
# Company-Average ratio of articles per ESG-sentiment
pd.DataFrame(sent_ratio_per_comp).mean(axis=1)

positive    0.543615
negative    0.327408
neutral     0.128978
dtype: float64

In [9]:
# Total number of articles per ESG-aspect
pd.DataFrame(asp_count_per_comp).sum(axis=1)

environmental    24546
social           15086
governance       11455
dtype: int64

In [10]:
# Company-Average ratio of articles per ESG-aspect
pd.DataFrame(asp_ratio_per_comp).mean(axis=1)

environmental    0.357421
social           0.343043
governance       0.299536
dtype: float64

## Section 6.1

### Table 2: Distribution of relevance scores

In [11]:
data['relevance_score'].value_counts().sort_index()

relevance_score
2         2
3       122
4       983
5      1380
6     10465
7     18219
8     17113
9      2780
10       23
Name: count, dtype: int64

### Fig 3 - left: Weekly Count of Articles per Aspect and in Total

In [12]:
counts = data_all_graph.groupby('aspect').resample('W').size().unstack(fill_value=0).T#.rolling(60, min_periods=1).mean()

# Create the figure
fig = go.Figure()

# Add a line for each aspect
for aspect in counts.columns:
    fig.add_trace(go.Scatter(
        x=counts.index, 
        y=counts[aspect], 
        mode='lines+markers',
        name=aspect.title(),
        line=dict(color=aspect_colors[aspect]),
    ))

fig.add_trace(go.Scatter(
    x=counts.index, 
    y=counts.sum(axis=1), 
    mode='lines+markers',
    name='Total'
))

# Update layout with titles
fig.update_layout(
    title='Weekly Number of Articles',
    xaxis_title='Week',
    yaxis_title='Count',
    xaxis_tickformat='%Y-%m-%d',
    width=800,
    height=800,
    font=dict(size=textfont_size),
    xaxis=dict(showline=True, linewidth=1, linecolor='black', mirror=True),  # Black line on x-axis
    yaxis=dict(showline=True, linewidth=1, linecolor='black', mirror=True),  # Black line on y-axis
    legend=dict(
        x=0.98,  # Position it 10% from the left side of the first subplot
        y=0.78,  # Position it vertically centered
        xanchor='right',  # Anchor the legend to the left
        yanchor='bottom',  # Anchor the legend to the middle
        bgcolor='rgba(255, 255, 255, 0.8)',  # Optional: set a background color for better visibility
        bordercolor='black',  # Optional: set border color
        borderwidth=1,  # Optional: set border width
        font = dict(size = textfont_size-3),
    ),
)

# Show the plot
fig.show()

### Fig. 3 - right: Moving Average of the Daily Sentiment

In [13]:
### Sentiment over Time per Aspect
# Create a figure
fig = go.Figure()

for aspect_filter in aspect_filters:
    if aspect_filter:

        # Add rolling mean line
        fig.add_trace(go.Scatter(
            x=daily_data_asp[aspect_filter].index,  # X-axis will be the index (dates or similar)
            y=daily_data_asp[aspect_filter]['sentiment_int'].rolling(30, min_periods=2).mean(),
            mode='lines',
            name=aspect_filter.title(),
            line=dict(color=aspect_colors[aspect_filter])
        ))

        # Compute mean relevance score
        avg_score = data_all_asp[aspect_filter]['sentiment_int'].mean()

        # Add horizontal dashed line for the average relevance score
        fig.add_trace(go.Scatter(
            x=[start_date, end_date],  # Start and end date for the horizontal line
            y=[avg_score] * 2,  # From start to end point - constant value
            mode='lines',
            line=dict(color=aspect_colors[aspect_filter], dash='dash'),
            showlegend=False
        ))

        # Add annotation to write y-value at the end of the horizontal line
        fig.add_annotation(
            x=end_date,  # Position at the right end of the line
            y=avg_score,  # At the level of the horizontal line
            text=f'<b>{avg_score:.2f}</b>',  # Display value rounded to 2 decimals
            showarrow=False,
            xanchor='left',  # Text anchored to the left of the point
            yanchor='middle',
            font=dict(color=aspect_colors[aspect_filter], size=16)  # Color matches the line
        )

# Update layout with legend
fig.update_layout(
    title='Moving Average of Sentiment',
    width=800,
    height=800,
    xaxis_tickformat='%Y-%m-%d',
    xaxis_title='Date',
    yaxis_title='Sentiment',
    font=dict(size=textfont_size),
    xaxis=dict(showline=True, linewidth=1, linecolor='black', mirror=True),  # Black line on x-axis
    yaxis=dict(showline=True, linewidth=1, linecolor='black', mirror=True),  # Black line on y-axis
    legend=dict(
        x=0.285,
        y=0.01,
        xanchor='right',
        yanchor='bottom',
        bgcolor='rgba(255, 255, 255, 0.8)',  # Set a background color for better visibility
        bordercolor='black',  # Set border color
        borderwidth=1,  # Set border width
        font = dict(size = textfont_size-3),
    ),
)


# Show the plot
fig.show()