# Notebook to reproduce paper results

Simply start executing cells. You can play around with different parameters if you wish so.

In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from collections import Counter
from itertools import accumulate
from dtaidistance import dtw_visualisation as dtwvis
from company_info import company_info_list

from tqdm.auto import tqdm
from esg_dtw import filter_data, get_daily_estimator, get_price_data, apply_dtw

In [2]:
start_date = '2023-01-01'
# start_date = '2024-01-01'
# end_date = '2024-02-29'
# end_date = '2023-12-31'
end_date = '2024-09-17'

sent_dict = {'positive': 1, 'negative': -1, 'neutral': 0}

# LOAD DATA
data = pd.read_json('nano_esg_2024_09_17.json', lines=True)


relevance_cutoff = 0 #take relevance scores bigger than the cutoff. Not used in the paper - instead we use the relevance score as a weight
# All aspect-combinations. None does not filter by aspect, meaning it includes all aspects
aspect_filters = [None, 'environmental', 'social', 'governance', 'env_soc', 'env_gov', 'soc_gov']

# Weight each article by its relevance score
weighted_relevance = True
weighted_relevance_dict = {1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8, 9:9, 10:10}

dtw_window = 5

data['sentiment_int'] = data['sentiment'].apply(lambda x: sent_dict[x])
data['original_date'] = data['date'].copy()

#Shift date one day forward if the hour of release is after 17:00
def add_day_if_late(timestamp, cutoff_hour=17):
    if timestamp.hour > cutoff_hour:
        timestamp += pd.Timedelta(days=1)
    return timestamp
data['date'] = data['date'].apply(add_day_if_late)

In [None]:
# If desired, only perform calculations for a single company (in the paper, we use 'daimler' as an example)
selected_company = None

# Get the price data and ESG data per aspect for each company
company_aspect_daily_data_dict = {}
company_aspect_esg_sent_dict = {}
company_price_dict = {}
company_date_dt_dict = {}
for company in tqdm(company_info_list.keys(), desc='Gathering data...'):
    if selected_company and company != selected_company:
        continue
    company_aspect_daily_data_dict[company] = {}
    company_aspect_esg_sent_dict[company] = {}
    company_date_dt_dict[company] = {}

    #Get stock data
    df = get_price_data(company, start_date = start_date, end_date = end_date)
    company_price_dict[company] = df['Price'].to_list()
    company_date_dt_dict[company] = df.index.to_list()
    for aspect_filter in aspect_filters:
        daily_data = filter_data(data, start_date, end_date, company, aspect_filter, relevance_cutoff, weighted_relevance)
        daily_data.fillna(0, inplace=True)

        #get daily sentiment
        sentiment_weekdays = []
        for index, row in daily_data.iterrows():
            if index in df.index.to_list():
                sentiment_weekdays.append(row['sentiment_int'])
        esg_sentiment = list(accumulate(sentiment_weekdays))

        company_aspect_daily_data_dict[company][aspect_filter] = daily_data
        company_aspect_esg_sent_dict[company][aspect_filter] = esg_sentiment


In [None]:
# Here, we iterate over the time series and perform DTW at each point to determine the best aspect to use for the estimator without using future information
# At the same time, we keep track of multiple variables which we will investigate in the plots below

mode=''              #General or ''. If General, we only consider the None aspect filter (all aspects combined)
use_lag_to_invest = True
data_window = 70         #days - set to None to simply take everything before this point

company_estimator_dict = {}         #here, we always just append the currently decided estimator
company_shifted_estimator_dict = {} #here, we always replace the last x entries of the estimator (since we take a data_window to make this decision)
company_best_aspect_dict = {}
distance_dict = {}
dtw_dict_per_comp = {}  #for paper
for company in tqdm(company_info_list.keys(), desc='Calculating daily return estimators...'):
    if selected_company and company != selected_company:
        continue
    company_estimator_dict[company] = [0, 0]
    company_shifted_estimator_dict[company] = [0, 0]
    company_best_aspect_dict[company] = [None, None]
    distance_dict[company] = [0, 0]
    dtw_dict_per_comp[company] = [{}, {}]
    #Iterate over days, determine best aspect and set the return estimator
    for i in range(2, len(company_date_dt_dict[company])):
        aspect_distance_dict_True = {}  #Keeps track of the actual distances, not manipulated by lag
        aspect_distance_dict = {}       #Keeps track of the distances, manipulated by lag (set to 1000 if lag is negative)
        aspect_lag_dict = {}            #Keeps track of the lag of the aspects
        no_trade_dict = {}              #Keeps track of whether we are allowed to trade this aspect or not
        dtw_dict = {}
        for aspect_filter in aspect_filters:
            dtw_dict[aspect_filter] = {}    #temp
            no_trade_dict[aspect_filter] = False
            #if mode is general, retrieve only the None aspect filter
            if mode == 'General' and aspect_filter is not None:
                continue
            #retrieve data from dicts - including that day
            if data_window:
                price = company_price_dict[company][np.max([0, i-data_window+1]):i+1]
                esg_sentiment = company_aspect_esg_sent_dict[company][aspect_filter][np.max([0, i-data_window+1]):i+1]
            else:
                price = company_price_dict[company][:i+1]
                esg_sentiment = company_aspect_esg_sent_dict[company][aspect_filter][:i+1]
            #Normalize s1, s2, get sentiment gradient and calculate best path
            s1, s2, d, best_path = apply_dtw(price, esg_sentiment, int(np.min([len(esg_sentiment), dtw_window])))

            dtw_dict[aspect_filter]['d'] = d
            dtw_dict[aspect_filter]['best_path'] = best_path
            dtw_dict[aspect_filter]['price'] = s1
            dtw_dict[aspect_filter]['sentiment'] = s2

            aspect_distance_dict_True[aspect_filter] = d
            #Calculate lag - set distance to 1000 if it is negative
            lag = [l[0] - l[1] for l in best_path]
            if lag:
                if lag[-1] <= 0 and use_lag_to_invest:
                    #price is leading esg-sentiment -> we cannot use this to predict future returns
                    no_trade_dict[aspect_filter] = True     #this is not enough because there could still be an aspect which has pos lag but slightly
                                                            #worse distance which would not be picked without manipulating the distance
                    d = 1000
            aspect_distance_dict[aspect_filter] = d
            aspect_lag_dict[aspect_filter] = lag[-1] if lag else -10
            
        dtw_dict_per_comp[company].append(dtw_dict)
        
        #Get aspect with smallest distance
        best_aspect = min(aspect_distance_dict, key=aspect_distance_dict.get)

        distance_dict[company].append(aspect_distance_dict_True[best_aspect])


        #If the smallest distance is still too big, set the estimator to be neutral
        if aspect_distance_dict[best_aspect] >= 1000 or no_trade_dict[best_aspect] and (mode != 'General' and use_lag_to_invest is not False):
            company_estimator_dict[company].append(0)
            best_aspect = None
        else:
            company_estimator_dict[company].append(company_aspect_daily_data_dict[company][best_aspect].loc[company_date_dt_dict[company][i]].values[0])
        
        #Gather variables
        company_best_aspect_dict[company].append(best_aspect)
        company_shifted_estimator_dict[company].append(company_aspect_daily_data_dict[company][best_aspect].loc[company_date_dt_dict[company][i]].values[0])

#### Create Combined ESG-Sentiment Graph

We create this graph by always taking the sentiment of the determined aspect-combination above. Then, we need to re-normalize the resulting graph using apply_dtw(), so that the price and sentiment is on the same scale

In [None]:
composed_company_dict = {}
for comp in tqdm(company_info_list.keys(), desc='Retrieving Composed ESG Sentiment Series...'):
    
    price = company_price_dict[comp]
    composed_esg = company_shifted_estimator_dict[comp]
    composed_esg_acc = list(accumulate(composed_esg))
    s1, s2, d, best_path = apply_dtw(price, composed_esg_acc, dtw_window)
    composed_company_dict[comp] = {'price': s1, 'esg': s2, 'distance': d, 'best_path': best_path}

#### HP Optimization - This creates .csv files with the estimators for every HP combination

We provide all the estimator .csv files for the HP Optimization we present in the paper under the folder _hp_opt_. If you want to reproduce them, be aware that the different source of price data (yfinance) can lead to different estimators. However, it should be very similar.

We do not provide a way to do portfolio optimization in this notebook, but if you have a pipeline set up you can use the files and test it on your own. Be sure to adjust the file_name according to the path you want to save the .csv files at.

Note: executing this for a large number of combinations can take a long time

In [None]:
# We can use this code to create a set of estimator files for optimizing the hyperparameters

dtw_window_list = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 15]
data_window_list = [20, 30, 40, 50, 60, 70, 80, 90, 100]

use_lag_to_invest = True
daily_est_kwargs = {
    'company_date_dt_dict': company_date_dt_dict,
    'company_price_dict': company_price_dict,
    'company_aspect_esg_sent_dict': company_aspect_esg_sent_dict,
    'company_aspect_daily_data_dict': company_aspect_daily_data_dict,
    'aspect_filters': aspect_filters,
    'use_lag_to_invest': use_lag_to_invest,
}

for dtw_window in dtw_window_list:
    for data_window in data_window_list:
        print(f'\rCalculating for {dtw_window} DTW window and {data_window} data window...', end='')
        company_estimator_dict = get_daily_estimator(dtw_window, data_window, daily_est_kwargs)

        #Create dataframe
        company_ret_est_df = pd.DataFrame()
        for company in company_info_list.keys():
            company_df = pd.DataFrame(company_estimator_dict[company])
            company_df.columns = [company_info_list[company]['db_name']]
            company_df.index = company_date_dt_dict[company]
            company_df.index.names = ['Date']
            if len(company_ret_est_df) == 0:
                company_ret_est_df = company_df
            else:
                company_ret_est_df = company_ret_est_df.merge(company_df, on='Date', how='outer')

        file_name = f'estimator_files/hp_opt/esg_sentiments_{data_window}DW_{dtw_window}dtw.csv'
        company_ret_est_df.to_csv(file_name)
        

## Results from Paper

### Regime Ratios - Table 3 in the paper

In [6]:
regime_lengths_comp = {}
regime_lengths_sum = {}
for comp in company_best_aspect_dict:
    
    occurences = Counter(company_best_aspect_dict[comp])

    regime_lengths_comp[comp] = {i: occurences[i] for i in occurences if i}
    regime_lengths_sum[comp] = sum([occurences[i] for i in occurences if i])

In [7]:
#look specifically at auto industry
auto_industry = ['daimler', 'bmw', 'vw', 'porsche_ag']
finance_industry = ['deutsche_bank', 'commerzbank', 'deutsche_boerse']
insurance_industry = ['allianz', 'munich_re', 'hannover_re']
chemical_industry = ['basf', 'bayer', 'covestro', 'brenntag']
consumer_industry = ['beiersdorf', 'henkel', 'adidas', 'zalando']
infrastructure_industry = ['rwe', 'siemens_energy', 'eon']

# Get regime lengths for all companies
all_comps = [comp for comp in company_best_aspect_dict]
regime_lengths_all = {i: 0 for i in aspect_filters if i}
for comp in all_comps:
    for aspect in regime_lengths_all:
        if aspect in regime_lengths_comp[comp]:
            regime_lengths_all[aspect] += regime_lengths_comp[comp][aspect]
all_companies = {k: v/np.sum(list(regime_lengths_all.values())) for k, v in regime_lengths_all.items()}

# Get regime lengths for a custom set of companies / industries
# Vary the custom industry list to reproduce the different rows in the Table
custom_industry = auto_industry

regime_lengths = {i: 0 for i in aspect_filters if i}
for comp in custom_industry:
    for aspect in regime_lengths:
        if aspect in regime_lengths_comp[comp]:
            regime_lengths[aspect] += regime_lengths_comp[comp][aspect]

industry_companies = {k: v/np.sum(list(regime_lengths.values())) for k, v in regime_lengths.items()}

Note, that because of the different datasource for the prices (yfinance), the results are slightly different from the ones in the paper.

However, this does not take away from the derived conclusions presented there.

In [None]:

for asp in all_companies:
    print(asp, (industry_companies[asp]-all_companies[asp])*100)

In [None]:
# Sorting keys and values together to ensure the correct order
label_order = ['environmental', 'env_soc', 'social', 'soc_gov', 'governance', 'env_gov']

labels = label_order
sizes = [regime_lengths_all[i] for i in label_order]

# Creating the pie chart
plt.figure(figsize=(6,6))
plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90)

# Equal aspect ratio ensures the pie is drawn as a circle
plt.axis('equal')

# Display the chart
plt.show()

### Investigate a single company - in the paper, we use 'daimler'

In [8]:
comp = 'daimler'

#### First, clean the regimes so that fluctuations of a few days dont disrupt them

In [10]:
datamap = {'All': 2, 'environmental': 3, 'env_soc': 4, 'social': 5, 'soc_gov': 6, 'governance': 7, 'env_gov': 8}
for i in range(0 ,len(company_best_aspect_dict[comp])):
    if company_best_aspect_dict[comp][i] is None:
        company_best_aspect_dict[comp][i] = 'All'
dataplot = list(map(lambda x: datamap[x], company_best_aspect_dict[comp]))

#Often, regimes fluctuate a bit. For display purposes, we disregard regime fluctuations that are shorter than a certain amount of days
regime_cutoff = 5 #days for an aspect series to build a regime
fluctuation_cutoff = 2  #If the aspect changes for more than this amount, we start a new regime

false_regimes = ['no_correlation', None]

new_regime_status = False
new_regime = []
regimes = []
regimes_aspects = []
aspect_regime = []
curr_reg_aspect = 0
new_reg_aspect = 0
for i in range(1, len(company_best_aspect_dict[comp])):
    #If we detect a change in aspect, we start a new regime
    if company_best_aspect_dict[comp][i] != company_best_aspect_dict[comp][i-1]:
        new_reg_aspect = company_best_aspect_dict[comp][i]
        new_regime_status = True
        if new_reg_aspect == curr_reg_aspect:
            #If the new regime was a short fluctuation, we continue with the old (current) regime
            new_regime_status = False
            aspect_regime = aspect_regime + new_regime
            aspect_regime.append(company_date_dt_dict[comp][i])
        else:
            new_regime = []
    elif company_best_aspect_dict[comp][i] not in false_regimes and not new_regime_status:
        curr_reg_aspect = company_best_aspect_dict[comp][i]
        aspect_regime.append(company_date_dt_dict[comp][i])
    if len(new_regime) >= fluctuation_cutoff and new_regime_status:
        #If the new regime is not a fluctuation, we add the old regime to the list
        if len(aspect_regime) > regime_cutoff and curr_reg_aspect != 'no_correlation':
            regimes.append(aspect_regime)
            regimes_aspects.append(curr_reg_aspect)
        new_regime_status = False
        if new_reg_aspect not in false_regimes:
            #If the new regime is not a no_correlation regime, we continue with it as the current regime
            aspect_regime = new_regime.copy()
            curr_reg_aspect = company_best_aspect_dict[comp][i]
            new_regime = []
        else:
            #otherwise we stop with the new regime
            curr_reg_aspect = 'no_correlation'
            new_regime = []
    elif new_regime_status:
        new_regime.append(company_date_dt_dict[comp][i])
#if we reach the end of data while in a regime, we add it to the list
if curr_reg_aspect not in false_regimes and len(aspect_regime) > regime_cutoff:
    regimes.append(aspect_regime)
    regimes_aspects.append(curr_reg_aspect)

In [None]:
time_before = int(data_window)

data_comp = data[data['company'] == comp]
data_comp = data_comp[data_comp['relevance_score'] >= relevance_cutoff]
regime_info = {}
distances = []
for r in range(0, len(regimes)):
    start_idx = company_date_dt_dict[comp].index(regimes[r][0])
    end_idx = company_date_dt_dict[comp].index(regimes[r][-1])
    if regimes_aspects[r] is None:
        data_comp_asp = data_comp.copy()
    elif regimes_aspects[r] == 'env_soc':
        data_comp_asp = data_comp[data_comp['aspect'] != 'governance']
    elif regimes_aspects[r] == 'env_gov':
        data_comp_asp = data_comp[data_comp['aspect'] != 'social']
    elif regimes_aspects[r] == 'soc_gov':
        data_comp_asp = data_comp[data_comp['aspect'] != 'environmental']
    else:
        data_comp_asp = data_comp[data_comp['aspect'] == regimes_aspects[r]]
    data_r = data_comp_asp[(data_comp_asp['date'] >= pd.Timestamp(regimes[r][0])) & (data_comp_asp['date'] <= pd.Timestamp(regimes[r][-1]))]
    if weighted_relevance:
        data_r['weighted_relevance'] = data_r['relevance_score'].apply(lambda x: weighted_relevance_dict[x])

    regime_price = company_price_dict[comp][np.max([0, start_idx-time_before]):end_idx+1]
    regime_esg = company_shifted_estimator_dict[comp][np.max([0, start_idx-time_before]):end_idx+1]
    regime_esg_acc = list(accumulate(regime_esg))
    s1, s2, d, best_path = apply_dtw(regime_price, regime_esg_acc, dtw_window)
    distances.append(d/len(best_path))

    start_price = company_price_dict[comp][start_idx]
    end_price = company_price_dict[comp][end_idx]

    regime_info[r] = {'start_idx': start_idx,
                      'end_idx': end_idx,
                      'regime_aspect': regimes_aspects[r],
                      'articles': data_r,
                      'returns': (end_price - start_price) / start_price,
                      'price': s1,
                      'esg': s2,
                      'distance': d,
                      'best_path': best_path}

In [12]:
#Create clean regime array
clean_regimes = [None] * len(company_best_aspect_dict[comp])
for reg in regime_info:
    clean_regimes[regime_info[reg]['start_idx']:regime_info[reg]['end_idx']+1] = [regime_info[reg]['regime_aspect']] * (regime_info[reg]['end_idx'] - regime_info[reg]['start_idx'] + 1)

In [13]:
for i in range(0 ,len(clean_regimes)):
    if clean_regimes[i] is None:
        clean_regimes[i] = 'All'

### Figure 2 of paper in the three cells below:

In [None]:
# First, investigate the 'cleaned' regimes (this plot is not included in the paper)

datamap = {'All': 2, 'environmental': 3, 'env_soc': 4, 'social': 5, 'soc_gov': 6, 'governance': 7, 'env_gov': 8}
dataplot = list(map(lambda x: datamap[x], clean_regimes))

fig_esg, ax1 = plt.subplots()

ax2 = ax1.twinx()

ax2.plot(company_date_dt_dict[comp], composed_company_dict[comp]['price'], label='price')
ax2.plot(company_date_dt_dict[comp], composed_company_dict[comp]['esg'], label='esg')
ax1.plot(company_date_dt_dict[comp], dataplot, label='best aspect', color='red')


r = 12
start_idx = np.max([0, regime_info[r]['start_idx']])
end_idx = regime_info[r]['end_idx']
ax2.axvspan(company_date_dt_dict[comp][start_idx], company_date_dt_dict[comp][end_idx], color='green', alpha=0.1)
ax1.set_yticks([2,3,4,5,6,7,8], list(datamap.keys()))
ax2.legend()
ax1.set_title(comp)
plt.show()

In [None]:
# After a regime has been highlighted above, we can investigate its warping path below

idx = r
print(regime_info[idx]['regime_aspect'])
reg_start_idx = np.max([0, regime_info[idx]['start_idx'] - np.max([0, regime_info[idx]['start_idx']-time_before])])
reg_end_idx = np.max([0, regime_info[idx]['end_idx'] - np.max([0, regime_info[idx]['start_idx']-time_before])])
fig_wp, axes = dtwvis.plot_warping(regime_info[idx]['price'], regime_info[idx]['esg'], regime_info[idx]['best_path'])

axes[0].set_ylabel('Price')
axes[1].set_ylabel('ESG')
axes[1].set_xlabel('Time')
axes[1].axvspan(reg_start_idx, reg_end_idx, color='green', alpha=0.3)
axes[0].axvspan(reg_start_idx, reg_end_idx, color='green', alpha=0.3)

for item in ([axes[0].title, axes[0].xaxis.label, axes[0].yaxis.label] +
             axes[0].get_xticklabels() + axes[0].get_yticklabels()
             + [axes[1].title, axes[1].xaxis.label, axes[1].yaxis.label] +
             axes[1].get_xticklabels() + axes[1].get_yticklabels()):
    item.set_fontsize(15)

for ax in axes:
    # Get all the Line2D objects in the axis and change their linewidth
    for line in ax.get_lines():
        line.set_linewidth(3)  # Set your desired linewidth here

print(regime_info[idx]['distance'])
print(f'Final Lag: {regime_info[idx]["best_path"][-1][0] - regime_info[idx]["best_path"][-1][1]}')

In [None]:
# finally, we display the original aspect-regimes with the detected regime highlighted
datamap = {'All': 2, 'environmental': 3, 'env_soc': 4, 'social': 5, 'soc_gov': 6, 'governance': 7, 'env_gov': 8}
for i in range(0 ,len(company_best_aspect_dict[comp])):
    if company_best_aspect_dict[comp][i] is None:
        company_best_aspect_dict[comp][i] = 'All'
dataplot = list(map(lambda x: datamap[x], company_best_aspect_dict[comp]))

fig_esg, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.set_title('Daimler - Aspect-Regimes')
ax1.set_ylabel('Aspect')
ax1.set_xlabel('Date')
ax2.set_ylabel('Normalized Price/Sentiment')

for item in ([ax1.title, ax1.xaxis.label, ax1.yaxis.label] +
             ax1.get_xticklabels() + ax1.get_yticklabels() +
             [ax2.title, ax2.xaxis.label, ax2.yaxis.label] +
             ax2.get_xticklabels() + ax2.get_yticklabels()):
    item.set_fontsize(10)

ax1.plot(company_date_dt_dict[comp], dataplot, label='best aspect', color='red', alpha=0.7)
ax2.plot(company_date_dt_dict[comp], composed_company_dict[comp]['price'], label='price', linewidth=2)
ax2.plot(company_date_dt_dict[comp], composed_company_dict[comp]['esg'], label='esg', linewidth=2)

#Find regimes below first
ax2.axvspan(company_date_dt_dict[comp][start_idx], company_date_dt_dict[comp][end_idx], color='green', alpha=0.3)


ax1.set_yticks([2,3,4,5,6,7,8], list(datamap.keys()))
ax1.tick_params(labelrotation=45)
ax2.legend()
plt.show()

### Reproduce the plots used in Figure 1 of the paper:

In [17]:
end_indx = 391
total_price = company_price_dict[comp][:end_indx]
time = company_date_dt_dict[comp][:end_indx]


In [None]:
fig_price, ax = plt.subplots()
plt.plot(time, total_price, linewidth=3)
plt.title(f'Company: {comp}')
plt.xlabel('Time')
plt.xticks(rotation=45, ha='right')
plt.ylabel('Price')
for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
             ax.get_xticklabels() + ax.get_yticklabels()):
    item.set_fontsize(15)

ax.axvspan(time[len(time)-70], time[-1], color='green', alpha=0.3)
ax.set_yticklabels([])
ax.set_xticklabels([])
plt.show()

In [None]:
# Vary the aspect to reproduce the 6 plots (3 esg sentiments & 3 warping paths) of the schema!

plt.style.use(['default'])
aspect_filter = 'governance'
total_aspect_sentiment = company_aspect_esg_sent_dict[comp][aspect_filter][:end_indx]

fig_asp, ax = plt.subplots()
plt.plot(time, total_aspect_sentiment, linewidth=3)
plt.title(f'Company: {comp}')
plt.xlabel('Time')
plt.xticks(rotation=45, ha='right')
plt.ylabel('Soc-Gov Sentiment')
for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
             ax.get_xticklabels() + ax.get_yticklabels()):
    item.set_fontsize(15)
ax.axvspan(time[len(time)-70], time[-1], color='green', alpha=0.3)
ax.set_yticklabels([])
ax.set_xticklabels([])
plt.show()

In [21]:
aspect_filter = 'environmental'
dtw_idx = end_indx

d = dtw_dict_per_comp[comp][dtw_idx][aspect_filter]['d']
best_path = dtw_dict_per_comp[comp][dtw_idx][aspect_filter]['best_path']
lag = [l[0] - l[1] for l in best_path]
price = dtw_dict_per_comp[comp][dtw_idx][aspect_filter]['price']
sentiment = dtw_dict_per_comp[comp][dtw_idx][aspect_filter]['sentiment']

In [None]:
for i in aspect_filters:
    print(i, dtw_dict_per_comp[comp][dtw_idx][i]['d'])
    print(i, dtw_dict_per_comp[comp][dtw_idx][i]['best_path'][-1][0] - dtw_dict_per_comp[comp][dtw_idx][i]['best_path'][-1][1])

In [None]:
fig_dtw, axes = dtwvis.plot_warping(price, sentiment, best_path)

axes[0].set_ylabel('Price')
axes[1].set_xlabel('Days')
axes[1].set_ylabel('Sentiment')
for item in ([axes[0].title, axes[0].xaxis.label, axes[0].yaxis.label] +
             axes[0].get_xticklabels() + axes[0].get_yticklabels()
             + [axes[1].title, axes[1].xaxis.label, axes[1].yaxis.label] +
             axes[1].get_xticklabels() + axes[1].get_yticklabels()):
    item.set_fontsize(15)

axes[0].set_yticklabels([])
axes[0].set_xticklabels([])
axes[1].set_yticklabels([])
axes[1].set_xticklabels([])

for ax in axes:
    # Get all the Line2D objects in the axis and change their linewidth
    for line in ax.get_lines():
        line.set_linewidth(3)

print(d)
print(f'Final Lag: {best_path[-1][0] - best_path[-1][1]}')