# Total dynamical O-information

Add bootstrapping, gaussian sampling and all possible triplets (not only 35)

# TO-DO: bottstr farlo su tutte le triplette, nella guassiana mantenere anche covarianza e farla anche per matrice

## Index

1. [Functions](#functions)
1. [Load and binarize data](#load_binar_data)
    1. [Gaussian randomization](#load_binar_data_gaussian)
1. [Full length](#full_length)
    1. [Thomas' work](#thomas_work_full)
    1. [3 vs 1-2](#3_vs_1-2_full)
        1. [J-S divergence](#J-S_divergence_full)
    1. [3 vs 1-1-1](#3_vs_1-1-1_full)
1. [Half length](#half_length)
    1. [Thomas' work](#thomas_work_half)
    1. [3 vs 1-2](#3_vs_1-2_half)
        1. [J-S divergence](#J-S_divergence_half)
    1. [3 vs 1-1-1](#3_vs_1-1-1_half)
1. [Comparison different lengths](#comparison_lengths)
    1. [Comparison full-half](#comparison_f_h)
    1. [Creation dataset](#creation_dataset)
        1. [Comparison Industrials](#comparison_industrials)
        1. [Comparison Basic Materials](#comparison_basic_materials)
        1. [Comparison Financials](#comparison_financials)
        1. [Comparison Consumer Services](#comparison_consumer_services)
        1. [Comparison Health Care](#comparison_health_care)
        1. [Comparison Utilities](#comparison_utilities)
        1. [Comparison Oil & Gas](#comparison_oil_gas)
        1. [Comparison Consumer Goods](#comparison_consumer_goods)
1. [Heatmaps](#heatmaps)
    1. [Matrix](#matrix)
    1. [Divided by year](#divided_year)
        1. [Same sector](#year_same_sector)
    1. [Divided by two years](#divided_two_years)
    1. [Divided by three years](#divided_three_years)
1. [O-information](#o_info)
    1. [3 in Sector A](#o_info_3)
        1. [Matrix](#o_info_3_matrix)
        1. [One year](#o_info_3_one_year)
        1. [Two years](#o_info_3_two_years)
        1. [Three years](#o_info_3_three_years)
    1. [2 in Sector A and 1 in Sector B](#o_info_2_1)
        1. [Matrix](#o_info_2_1_matrix)
        1. [One year](#o_info_2_1_one_year)
        1. [Two years](#o_info_2_1_two_years)
        1. [Three years](#o_info_2_1_three_years)
    1. [1 in Sector A, 1 in Sector B and 1 in Sector C](#o_info_1_1_1)
        1. [Matrix](#o_info_1_1_1_matrix)
            1. [Bootstrapping](#o_info_1_1_1_matrix_bootstrapping)
            1. [Probability density function](#o_info_1_1_1_matrix_pdf)
            1. [Gaussian distribution](#o_info_1_1_1_matrix_gaussian)
            1. [All number of triplets](#o_info_1_1_1_matrix_all_triplets)
        1. [One year](#o_info_1_1_1_one_year)
            1. [Gaussian distribution](#o_info_1_1_1_one_year_gaussian)
            1. [All number of triplets](#o_info_1_1_1_one_year_all_triplets)
        1. [Two years](#o_info_1_1_1_two_years)
        1. [Three years](#o_info_1_1_1_three_years)

<a name="functions"></a>
## Functions

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from itertools import combinations, product
from tqdm.auto import tqdm
import matplotlib.patches as mpatches
import pickle
from scipy.special import kl_div
from scipy.stats import entropy

from ho_info_metrics.metrics import *
from ho_info_metrics.utils import *

In [None]:
def binarize_df(df, window_size):
    df = df.drop(columns=["Date"])
    for col in df.columns:
        if col != "Date":
            col_values = df[col].values
            binarized_values = []
            for i in range(len(col_values)):
                window_start = max(0, i - window_size + 1)
                window_mean = col_values[window_start : i + 1].mean()
                binarized_values.append(1 if col_values[i] > window_mean else 0)
            df[col] = binarized_values
    return df

In [None]:
def get_do_infos(target_sector, binarized_financial_ts):

    print(">>>", target_sector)
    target_stocks = list(
        stock_names[stock_names["Sector"] == target_sector]["Stock name"]
    )  # tutti gli stock name il cui settore sia il target
    print(len(target_stocks))
    other_stocks = list(
        stock_names[stock_names["Sector"] != target_sector]["Stock name"]
    )
    print(len(other_stocks))

    same_sector_triplets = list(combinations(target_stocks, 3))
    print(len(same_sector_triplets))
    different_sector_triplets = []
    while len(different_sector_triplets) != len(same_sector_triplets):
        triplet = []
        triplet.append(np.random.choice(target_stocks))
        triplet.append(np.random.choice(other_stocks))
        triplet.append(np.random.choice(other_stocks))
        different_sector_triplets.append(triplet)
    print(len(different_sector_triplets))

    doinfo_same_sector = []
    doinfo_different_sector = []
    bar_length = len(same_sector_triplets)
    with tqdm(total=bar_length) as pbar:
        pbar.set_description("Same sector triplets")
        for i in range(len(same_sector_triplets)):
            X1 = binarized_financial_ts[same_sector_triplets[i][0]].values
            X2 = binarized_financial_ts[same_sector_triplets[i][1]].values
            X3 = binarized_financial_ts[same_sector_triplets[i][2]].values
            X = np.vstack((X1, X2, X3))
            doinfo_same_sector.append(
                o_information_lagged_all(X, estimator="cat_ent")
            )  # calcola la total dynamical o_information della tripletta
            pbar.update(1)

    bar_length = len(different_sector_triplets)
    with tqdm(total=bar_length) as pbar:
        for i in range(len(different_sector_triplets)):
            X1 = binarized_financial_ts[different_sector_triplets[i][0]].values
            X2 = binarized_financial_ts[different_sector_triplets[i][1]].values
            X3 = binarized_financial_ts[different_sector_triplets[i][2]].values
            X = np.vstack((X1, X2, X3))
            doinfo_different_sector.append(
                o_information_lagged_all(X, estimator="cat_ent")
            )
            pbar.update(1)

    return (
        doinfo_same_sector,
        doinfo_different_sector,
    )  # ogni termine ritorna un array con la total dynamical o_information per ogni tripletta

In [None]:
def get_do_infos_same(target_sector, binarized_ts):
    """Get the total dynamical o_information for the triplets of the target sector"""

    print(">>>", target_sector)

    target_stocks = list(
        stock_names[stock_names["Sector"] == target_sector]["Stock name"]
    )  # tutti gli stock name il cui settore sia il target
    print("Length target_stocks:", len(target_stocks))

    # Having removed the Technology sector, each sector can now offer 35 triplets
    same_sector_triplets = list(combinations(target_stocks, 3))[:35]
    print("Length triplets same sector:", len(same_sector_triplets))

    doinfo_one_sector = []
    bar_length = len(same_sector_triplets)
    with tqdm(total=bar_length) as pbar:
        pbar.set_description("Same sector")
        for i in range(len(same_sector_triplets)):
            X1 = binarized_ts[same_sector_triplets[i][0]].values
            X2 = binarized_ts[same_sector_triplets[i][1]].values
            X3 = binarized_ts[same_sector_triplets[i][2]].values
            X = np.vstack((X1, X2, X3))
            doinfo_one_sector.append(
                o_information_lagged_all(X, estimator="cat_ent")
            )  # calcola la total dynamical o_information della tripletta
            pbar.update(1)

    return doinfo_one_sector

In [None]:
def get_o_infos_same(target_sector, binarized_ts, total= False):
    """Get the O-information for the triplets of the target sector"""
    
    print(">>>", target_sector)

    target_stocks = list(
        stock_names[stock_names["Sector"] == target_sector]["Stock name"]
    )  # tutti gli stock name il cui settore sia il target
    print("Length target_stocks:", len(target_stocks))
    
    if total:
        same_sector_triplets = list(combinations(target_stocks, 3))
    else:
        # Having removed the Technology sector, each sector can now offer 35 triplets
        same_sector_triplets = list(combinations(target_stocks, 3))[:35]
    print("Length triplets same sector:", len(same_sector_triplets))
    
    oinfo_one_sector = []
    bar_length = len(same_sector_triplets)
    with tqdm(total=bar_length) as pbar:
        pbar.set_description("Same sector")
        for i in range(len(same_sector_triplets)):
            X1 = binarized_ts[same_sector_triplets[i][0]].values
            X2 = binarized_ts[same_sector_triplets[i][1]].values
            X3 = binarized_ts[same_sector_triplets[i][2]].values
            X = np.vstack((X1, X2, X3))
            oinfo_one_sector.append(
                o_information_boot(X, estimator="cat_ent")
            )  # calcola la O-information della tripletta
            pbar.update(1)

    return oinfo_one_sector

In [None]:
def get_do_infos_two_different(target_sector, another_sector, binarized_ts, total=False):
    """Get the total dynamical o_information for the triplets
    composed of one node in the target_sector and two in the another_sector"""

    print(">>>", target_sector, "-", another_sector)

    target_stocks = list(
        stock_names[stock_names["Sector"] == target_sector]["Stock name"]
    )
    another_stocks = list(
        stock_names[stock_names["Sector"] == another_sector]["Stock name"]
    )
    print("Length another_stocks:", len(another_stocks))

    if total:
        another_sector_triplets = []
        for target in target_stocks:
            for combo in combinations(another_stocks, 2):
                another_sector_triplets.append((target, combo[0], combo[1]))
        print("Length triplets two sectors:", len(another_sector_triplets))
    else:
        # Having removed the Technology sector, each sector can now offer 35 triplets
        num_triplets = 35
        another_sector_triplets = []
        while len(another_sector_triplets) != num_triplets:
            triplet = []
            triplet.append(np.random.choice(target_stocks))
            triplet.append(np.random.choice(another_stocks))
            triplet.append(np.random.choice(another_stocks))
            another_sector_triplets.append(triplet)
        print("Length triplets two sectors:", len(another_sector_triplets))

    doinfo_two_sectors = []
    bar_length = len(another_sector_triplets)
    with tqdm(total=bar_length) as pbar:
        pbar.set_description("Two sectors")
        for i in range(len(another_sector_triplets)):
            X1 = binarized_ts[another_sector_triplets[i][0]].values
            X2 = binarized_ts[another_sector_triplets[i][1]].values
            X3 = binarized_ts[another_sector_triplets[i][2]].values
            X = np.vstack((X1, X2, X3))
            doinfo_two_sectors.append(o_information_lagged_all(X, estimator="cat_ent"))
            pbar.update(1)

    return doinfo_two_sectors

In [None]:
def get_o_infos_two_different(target_sector, another_sector, binarized_ts, total=False):
    """Get the O-information for the triplets composed of one
    node in the target_sector and two in the another_sector"""

    print(">>>", target_sector, "-", another_sector)

    target_stocks = list(
        stock_names[stock_names["Sector"] == target_sector]["Stock name"]
    )
    another_stocks = list(
        stock_names[stock_names["Sector"] == another_sector]["Stock name"]
    )
    print("Length another_stocks:", len(another_stocks))

    if total:
        another_sector_triplets = []
        for target in target_stocks:
            for combo in combinations(another_stocks, 2):
                another_sector_triplets.append((target, combo[0], combo[1]))
        print("Length triplets two sectors:", len(another_sector_triplets))
    
    else:
        # Having removed the Technology sector, each sector can now offer 35 triplets
        another_sector_triplets = []
        while len(another_sector_triplets) != 35:
            triplet = []
            triplet.append(np.random.choice(target_stocks))
            triplet.append(np.random.choice(another_stocks))
            triplet.append(np.random.choice(another_stocks))
            another_sector_triplets.append(triplet)
        print("Length triplets two sectors:", len(another_sector_triplets))

    oinfo_two_sectors = []
    bar_length = len(another_sector_triplets)
    with tqdm(total=bar_length) as pbar:
        pbar.set_description("Two sectors")
        for i in range(len(another_sector_triplets)):
            X1 = binarized_ts[another_sector_triplets[i][0]].values
            X2 = binarized_ts[another_sector_triplets[i][1]].values
            X3 = binarized_ts[another_sector_triplets[i][2]].values
            X = np.vstack((X1, X2, X3))
            oinfo_two_sectors.append(o_information_boot(X, estimator="cat_ent"))
            pbar.update(1)

    return oinfo_two_sectors

In [None]:
def get_do_infos_three_different(
    target_sector, sector1, sector2, binarized_ts
):
    """Get the total dynamical o_information for the triplets
    composed of three nodes in three different sectors"""

    print(">>>", target_sector, "-", sector1, "-", sector2)

    target_stocks = list(
        stock_names[stock_names["Sector"] == target_sector]["Stock name"]
    )
    stocks1 = list(stock_names[stock_names["Sector"] == sector1]["Stock name"])
    print("Length sector1:", len(stocks1))
    stocks2 = list(stock_names[stock_names["Sector"] == sector2]["Stock name"])
    print("Length sector2:", len(stocks2))

    # Having removed the Technology sector, each sector can now offer 35 triplets
    three_sectors_triplets = []
    while len(three_sectors_triplets) != 35:
        triplet = []
        triplet.append(np.random.choice(target_stocks))
        triplet.append(np.random.choice(stocks1))
        triplet.append(np.random.choice(stocks2))
        three_sectors_triplets.append(triplet)
    print("Length triplets three sectors:", len(three_sectors_triplets))

    doinfo_three_sectors = []
    bar_length = len(three_sectors_triplets)
    with tqdm(total=bar_length) as pbar:
        pbar.set_description("Three sectors")
        for i in range(len(three_sectors_triplets)):
            X1 = binarized_ts[three_sectors_triplets[i][0]].values
            X2 = binarized_ts[three_sectors_triplets[i][1]].values
            X3 = binarized_ts[three_sectors_triplets[i][2]].values
            X = np.vstack((X1, X2, X3))
            doinfo_three_sectors.append(
                o_information_lagged_all(X, estimator="cat_ent")
            )
            pbar.update(1)

    return doinfo_three_sectors

In [None]:
def get_o_infos_three_different(
    target_sector, sector1, sector2, binarized_ts, total=False
):
    """Get the O-information for the triplets
    composed of three nodes in three different sectors"""

    print(">>>", target_sector, "-", sector1, "-", sector2)

    target_stocks = list(
        stock_names[stock_names["Sector"] == target_sector]["Stock name"]
    )
    stocks1 = list(stock_names[stock_names["Sector"] == sector1]["Stock name"])
    print("Length sector1:", len(stocks1))
    stocks2 = list(stock_names[stock_names["Sector"] == sector2]["Stock name"])
    print("Length sector2:", len(stocks2))

    if total:
        three_sectors_triplets = []
        for combo in product(target_stocks, stocks1, stocks2):
            three_sectors_triplets.append(combo)
        print("Length triplets three sectors:", len(three_sectors_triplets))

    else:
        # Having removed the Technology sector, each sector can now offer 35 triplets
        three_sectors_triplets = []
        while len(three_sectors_triplets) != 35:
            triplet = []
            triplet.append(np.random.choice(target_stocks))
            triplet.append(np.random.choice(stocks1))
            triplet.append(np.random.choice(stocks2))
            three_sectors_triplets.append(triplet)
        print("Length triplets three sectors:", len(three_sectors_triplets))

    oinfo_three_sectors = []
    bar_length = len(three_sectors_triplets)
    with tqdm(total=bar_length) as pbar:
        pbar.set_description("Three sectors")
        for i in range(len(three_sectors_triplets)):
            X1 = binarized_ts[three_sectors_triplets[i][0]].values
            X2 = binarized_ts[three_sectors_triplets[i][1]].values
            X3 = binarized_ts[three_sectors_triplets[i][2]].values
            X = np.vstack((X1, X2, X3))
            oinfo_three_sectors.append(
                o_information_boot(X, estimator="cat_ent")
            )
            pbar.update(1)

    return oinfo_three_sectors

In [None]:
def max_bins(array1, array2, array3):
    """Return the maximum number of bins so that there is at least one value inside each interval"""
    
    bins = 1
    cond = True

    while cond:
        bins += 1
        array1_hist, _ = np.histogram(array1, bins=bins)
        array2_hist, _ = np.histogram(array2, bins=bins)
        array3_hist, _ = np.histogram(array3, bins=bins)

        if np.isin(0, array1_hist):
            cond = False

        elif np.isin(0, array2_hist):
            cond = False

        elif np.isin(0, array3_hist):
            cond = False
            
    return bins - 1

In [None]:
def JS_div(array1, array2):
    """Compute the J-S divergence of two array"""
    
    array1_sort = np.sort(array1)
    array2_sort = np.sort(array2)

    array1_normalized = array1_sort / np.sum(array1_sort)
    array2_normalized = array2_sort / np.sum(array2_sort)

    average = (array1_normalized + array2_normalized) / 2
    average_sort = np.sort(average)
    
    bins = max_bins(array1_normalized, array2_normalized, average_sort)
    
    array1_hist, _ = np.histogram(array1_normalized, bins=bins)
    array2_hist, _ = np.histogram(array2_normalized, bins=bins)
    average_hist, _ = np.histogram(average_sort, bins=bins)
    
    kl_array1 = entropy(array1_hist, average_hist)
    kl_array2 = entropy(array2_hist, average_hist)
 
    return np.sum(kl_array1 + kl_array2) / 2

In [None]:
def gaussian_random_numbers(dataframe, column):
    """Given a dataframe and a column, the function return a list of numbers extracted 
    from a gaussian distribution with the same mean and variance as the original column"""
    
    if column not in dataframe.columns:
        return None

    mean = dataframe[column].mean()
    variance = dataframe[column].var()

    random_numbers = np.random.normal(loc=mean, scale=np.sqrt(variance), size=len(dataframe))

    return random_numbers.tolist()

<a name="load_binar_data"></a>
## Load and binarize data

In [None]:
financial_ts = pd.read_csv(
    "data/Economic_data/NYSE_119stocks_2000Jan_2021June_withdates.csv"
)
stock_names = pd.read_csv(
    "data/Economic_data/list_stocknames.txt",
    sep=", ",
    names=["Index", "Stock name", "Sector"],
)

In [None]:
financial_ts

In [None]:
stock_names

In [None]:
tw = 7  # time window, originalmente era 2
binarized_financial_ts = binarize_df(financial_ts, tw)

In [None]:
binarized_financial_ts

In [None]:
sectors = list(stock_names["Sector"].unique())
sectors

<a name="load_binar_data_gaussian"></a>
### Gaussian randomization

In [None]:
media_colonne = financial_ts.iloc[:, 1:].mean()
covarianza = financial_ts.iloc[:, 1:].cov()

In [None]:
num_campioni = len(financial_ts)
valori_generati = np.random.multivariate_normal(media_colonne, covarianza, num_campioni)

In [None]:
financial_ts_gauss = pd.DataFrame(valori_generati, columns=financial_ts.iloc[:, 1:].columns)
financial_ts_gauss.insert(0, financial_ts.columns[0], financial_ts[financial_ts.columns[0]])

In [None]:
financial_ts_gauss

In [None]:
tw = 7  # time window, originalmente era 2
binarized_financial_ts_gauss = binarize_df(financial_ts_gauss, tw)

In [None]:
binarized_financial_ts_gauss

<a name="full_length"></a>
## Full length

We consider the full length of the time series.

<a name="thomas_work_full"></a>
### Thomas' work

Each sector contains the maximun number of triplets, so the number of triplets is not constant.

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for i in range(len(sectors)):
    doinfo_same_sector, doinfo_different_sector = get_do_infos(
        sectors[i], binarized_financial_ts
    )

    np.save(
        f"./results/economic_results/full/do_info/doinfo_same_sector_{sectors[i]}_tw_{tw}.npy",
        doinfo_same_sector,
    )

    np.save(
        f"./results/economic_results/full/do_info/doinfo_different_sector_{sectors[i]}_tw_{tw}.npy",
        doinfo_different_sector,
    )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE VIOLIN PLOT

fig, axs = plt.subplots(3, 3, figsize=(40, 40))
for i in range(len(sectors)):
    row = i // 3
    col = i % 3
    doinfo_same_sector = np.load(
        f"./results/economic_results/full/do_info/doinfo_same_sector_{sectors[i]}_tw_{tw}.npy"
    )
    doinfo_different_sector = np.load(
        f"./results/economic_results/full/do_info/doinfo_different_sector_{sectors[i]}_tw_{tw}.npy"
    )
    parts = axs[row, col].violinplot(doinfo_same_sector, showextrema=False)
    for pc in parts["bodies"]:
        pc.set_facecolor("tab:blue")
        pc.set_edgecolor("black")
        pc.set_alpha(0.5)
    parts = axs[row, col].violinplot(doinfo_different_sector, showextrema=False)
    for pc in parts["bodies"]:
        pc.set_facecolor("tab:orange")
        pc.set_edgecolor("black")
        pc.set_alpha(0.5)
    axs[row, col].set_title(sectors[i], size=35)
    axs[row, col].set_ylabel(r"$d\Omega_3^{tot.}$", size=37)
    axs[row, col].tick_params(axis="both", labelsize=18)
    axs[row, col].legend(
        handles=[
            mpatches.Patch(color="tab:blue", label="Same sector", alpha=0.4),
            mpatches.Patch(color="tab:orange", label="Different sector", alpha=0.4),
        ],
        fontsize=30,
    )


plt.savefig(
    f"./results/economic_results/full/images/doinfo_random_sectors_tw_{tw}.pdf",
    dpi=600,
    bbox_inches="tight",
)

<a name="3_vs_1-2_full"></a>
### 3 vs 1-2

In previous work, a triplet from the same sector was compared with a triplet in which one node is from the target sector and the other two are randomly selected from other sectors. Now I would like to compare the triplet from the same sector with a more carefully chosen triplet (1 from the target sector and the other two from tho different chosen sectors).
  
To ensure that the number of triplets is always the same I take 35, which is the maximum number that all sectors can offer.

In [None]:
target_sector = "Technology"
list(stock_names[stock_names["Sector"] == target_sector]["Stock name"])

In [None]:
# I am deleting the data for the Techonolgy sector because there are only a few
binarized_financial_ts = binarized_financial_ts.drop(
    columns=["CACI", "CIEN", "DBD", "DDD"]
)

In [None]:
# Usa il metodo .isin() per creare una maschera booleana
maschera = stock_names["Stock name"].isin(["CACI", "CIEN", "DBD", "DDD"])

# Usa la maschera per selezionare solo le righe che non soddisfano la condizione
stock_names = stock_names[~maschera]

In [None]:
sectors = sectors[:-1]
sectors

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for target in sectors:
    other_sectors = [sector for sector in sectors if sector != target]
    for i, sector in enumerate(other_sectors):

        if i == 0:
            doinfo_one_sector = get_do_infos_same(target, binarized_financial_ts)
            np.save(
                f"./results/economic_results/full/do_info/doinfo_one_sector_{target}_tw_{tw}.npy",
                doinfo_one_sector,
            )

        doinfo_two_sectors = get_do_infos_two_different(
            target, sector, binarized_financial_ts
        )
        np.save(
            f"./results/economic_results/full/do_info/doinfo_two_sectors_{target}_{sector}_tw_{tw}.npy",
            doinfo_two_sectors,
        )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE VIOLIN PLOTS

for target in sectors:
    other_sectors = [sector for sector in sectors if sector != target]
    fig, axs = plt.subplots(3, 3, figsize=(40, 40))
    for i, sector in enumerate(other_sectors):
        row = i // 3
        col = i % 3
        doinfo_one_sector = np.load(
            f"./results/economic_results/full/do_info/doinfo_one_sector_{target}_tw_{tw}.npy"
        )
        doinfo_two_sectors = np.load(
            f"./results/economic_results/full/do_info/doinfo_two_sectors_{target}_{sector}_tw_{tw}.npy"
        )
        parts = axs[row, col].violinplot(doinfo_one_sector, showextrema=False)
        for pc in parts["bodies"]:
            pc.set_facecolor("tab:blue")
            pc.set_edgecolor("black")
            pc.set_alpha(0.5)
        parts = axs[row, col].violinplot(doinfo_two_sectors, showextrema=False)
        for pc in parts["bodies"]:
            pc.set_facecolor("tab:orange")
            pc.set_edgecolor("black")
            pc.set_alpha(0.5)
        title = str(target + " - " + sector)
        axs[row, col].set_title(title, size=35)
        axs[row, col].set_ylabel(r"$d\Omega_3^{tot.}$", size=40)
        axs[row, col].tick_params(axis="both", labelsize=18)
        label1 = str("3 in " + target)
        label2 = str("1 in " + target + ", 2 in " + sector)
        axs[row, col].legend(
            handles=[
                mpatches.Patch(color="tab:blue", label=label1, alpha=0.4),
                mpatches.Patch(color="tab:orange", label=label2, alpha=0.4),
            ],
            fontsize=25,
        )

    plt.savefig(
        f"./results/economic_results/full/images/two_sectors/doinfo_two_sectors_{target}_tw_{tw}.pdf",
        dpi=600,
        bbox_inches="tight",
    )

<a name="J-S_divergence_full"></a>
#### J-S divergence

Let's summarize the difference between the two distributions into a single number, so we can make a comparison.

In [None]:
# The row index indicates the target sector, the column index the other sector
df_full = pd.DataFrame(
    columns=[
        "Industrials",
        "Basic Materials",
        "Financials",
        "Consumer Services",
        "Health Care",
        "Utilities",
        "Oil & Gas",
        "Consumer Goods",
    ]
)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAME

for target in sectors:
    other_sectors = [sector for sector in sectors if sector != target]

    for i, sector in enumerate(other_sectors):
        doinfo_one_sector = np.load(
            f"./results/economic_results/full/do_info/doinfo_one_sector_{target}_tw_{tw}.npy"
        )
        doinfo_two_sectors = np.load(
            f"./results/economic_results/full/do_info/doinfo_two_sectors_{target}_{sector}_tw_{tw}.npy"
        )

        df_full.at[target, sector] = JS_div(doinfo_one_sector, doinfo_two_sectors)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAME

df_full.to_csv("./results/economic_results/full/result_full.csv", index=True)
df_full

<a name="3_vs_1-1-1_full"></a>
### 3 vs 1-1-1

The triplet from the same sector is compared with a triplet in which each element came from a different sector (one must be the target sector).
  
To ensure that the number of triplets is always the same I take 35, which is the maximum number that all sectors can offer.

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for target in sectors:
    other_sectors = [sector for sector in sectors if sector != target]

    for i, sector1 in enumerate(other_sectors):
        other_sectors2 = [sector for sector in other_sectors if sector != sector1]
        for j, sector2 in enumerate(other_sectors2):

            doinfo_three_sectors = get_do_infos_three_different(
                target, sector1, sector2, binarized_financial_ts
            )
            np.save(
                f"./results/economic_results/full/do_info/doinfo_three_sectors_{target}_{sector1}_{sector2}_tw_{tw}.npy",
                doinfo_three_sectors,
            )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE VIOLIN PLOTS

for target1 in sectors:
    for target2 in [sector for sector in sectors if sector != target1]:
        other_sectors = [
            sector for sector in sectors if sector != target1 and sector != target2
        ]

        fig, axs = plt.subplots(3, 2, figsize=(40, 40))
        for i, sector in enumerate(other_sectors):
            row = i // 2
            col = i % 2
            doinfo_one_sector = np.load(
                f"./results/economic_results/full/do_info/doinfo_one_sector_{target1}_tw_{tw}.npy"
            )
            doinfo_three_sectors = np.load(
                f"./results/economic_results/full/do_info/doinfo_three_sectors_{target1}_{target2}_{sector}_tw_{tw}.npy"
            )
            parts = axs[row, col].violinplot(doinfo_one_sector, showextrema=False)
            for pc in parts["bodies"]:
                pc.set_facecolor("tab:blue")
                pc.set_edgecolor("black")
                pc.set_alpha(0.5)
            parts = axs[row, col].violinplot(doinfo_three_sectors, showextrema=False)
            for pc in parts["bodies"]:
                pc.set_facecolor("tab:orange")
                pc.set_edgecolor("black")
                pc.set_alpha(0.5)
            title = str(target1 + " - " + target2 + " - " + sector)
            axs[row, col].set_title(title, size=40)
            axs[row, col].set_ylabel(r"$d\Omega_3^{tot.}$", size=45)
            axs[row, col].tick_params(axis="both", labelsize=25)
            label1 = str("3 in " + target1)
            label2 = str("1 in " + target1 + ", 1 in " + target2 + ", 1 in " + sector)
            axs[row, col].legend(
                handles=[
                    mpatches.Patch(color="tab:blue", label=label1, alpha=0.5),
                    mpatches.Patch(color="tab:orange", label=label2, alpha=0.5),
                ],
                fontsize=25,
            )

        plt.savefig(
            f"./results/economic_results/full/images/three_sectors/doinfo_three_sectors_{target1}_{target2}_tw_{tw}.pdf",
            dpi=600,
            bbox_inches="tight",
        )

<a name="half_length"></a>
## Half length

I only take half time series, to see if the differences between the distributions decrease.

In [None]:
half_length = int(len(binarized_financial_ts) / 2)
binarized_financial_ts_half = binarized_financial_ts[:half_length]
binarized_financial_ts_half

<a name="thomas_work_half"></a>
### Thomas' work

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for i in range(len(sectors)):
    doinfo_same_sector, doinfo_different_sector = get_do_infos(
        sectors[i], binarized_financial_ts_half
    )

    np.save(
        f"./results/economic_results/half/do_info/doinfo_same_sector_{sectors[i]}_tw_{tw}.npy",
        doinfo_same_sector,
    )

    np.save(
        f"./results/economic_results/half/do_info/doinfo_different_sector_{sectors[i]}_tw_{tw}.npy",
        doinfo_different_sector,
    )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE VIOLIN PLOT

fig, axs = plt.subplots(3, 3, figsize=(40, 40))
for i in range(len(sectors)):
    row = i // 3
    col = i % 3
    doinfo_same_sector = np.load(
        f"./results/economic_results/half/do_info/doinfo_same_sector_{sectors[i]}_tw_{tw}.npy"
    )
    doinfo_different_sector = np.load(
        f"./results/economic_results/half/do_info/doinfo_different_sector_{sectors[i]}_tw_{tw}.npy"
    )
    parts = axs[row, col].violinplot(doinfo_same_sector, showextrema=False)
    for pc in parts["bodies"]:
        pc.set_facecolor("tab:blue")
        pc.set_edgecolor("black")
        pc.set_alpha(0.5)
    parts = axs[row, col].violinplot(doinfo_different_sector, showextrema=False)
    for pc in parts["bodies"]:
        pc.set_facecolor("tab:orange")
        pc.set_edgecolor("black")
        pc.set_alpha(0.5)
    axs[row, col].set_title(sectors[i], size=35)
    axs[row, col].set_ylabel(r"$d\Omega_3^{tot.}$", size=37)
    axs[row, col].tick_params(axis="both", labelsize=18)
    axs[row, col].legend(
        handles=[
            mpatches.Patch(color="tab:blue", label="Same sector", alpha=0.4),
            mpatches.Patch(color="tab:orange", label="Different sector", alpha=0.4),
        ],
        fontsize=30,
    )


plt.savefig(
    f"./results/economic_results/half/images/doinfo_random_sectors_tw_{tw}.pdf",
    dpi=600,
    bbox_inches="tight",
)

<a name="3_vs_1-2_half"></a>
### 3 vs 1-2

!!!! Run the initial part of the full 3 vs 1-2 first, so as to eliminate the stocks relating to technology

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for target in sectors:
    other_sectors = [sector for sector in sectors if sector != target]
    for i, sector in enumerate(other_sectors):

        if i == 0:
            doinfo_one_sector = get_do_infos_same(target, binarized_financial_ts_half)
            np.save(
                f"./results/economic_results/half/do_info/doinfo_one_sector_{target}_tw_{tw}.npy",
                doinfo_one_sector,
            )

        doinfo_two_sectors = get_do_infos_two_different(
            target, sector, binarized_financial_ts_half
        )
        np.save(
            f"./results/economic_results/half/do_info/doinfo_two_sectors_{target}_{sector}_tw_{tw}.npy",
            doinfo_two_sectors,
        )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE VIOLIN PLOTS

for target in sectors:
    other_sectors = [sector for sector in sectors if sector != target]
    fig, axs = plt.subplots(3, 3, figsize=(40, 40))
    for i, sector in enumerate(other_sectors):
        row = i // 3
        col = i % 3
        doinfo_one_sector = np.load(
            f"./results/economic_results/half/do_info/doinfo_one_sector_{target}_tw_{tw}.npy"
        )
        doinfo_two_sectors = np.load(
            f"./results/economic_results/half/do_info/doinfo_two_sectors_{target}_{sector}_tw_{tw}.npy"
        )
        parts = axs[row, col].violinplot(doinfo_one_sector, showextrema=False)
        for pc in parts["bodies"]:
            pc.set_facecolor("tab:blue")
            pc.set_edgecolor("black")
            pc.set_alpha(0.5)
        parts = axs[row, col].violinplot(doinfo_two_sectors, showextrema=False)
        for pc in parts["bodies"]:
            pc.set_facecolor("tab:orange")
            pc.set_edgecolor("black")
            pc.set_alpha(0.5)
        title = str(target + " - " + sector)
        axs[row, col].set_title(title, size=35)
        axs[row, col].set_ylabel(r"$d\Omega_3^{tot.}$", size=40)
        axs[row, col].tick_params(axis="both", labelsize=18)
        label1 = str("3 in " + target)
        label2 = str("1 in " + target + ", 2 in " + sector)
        axs[row, col].legend(
            handles=[
                mpatches.Patch(color="tab:blue", label=label1, alpha=0.4),
                mpatches.Patch(color="tab:orange", label=label2, alpha=0.4),
            ],
            fontsize=25,
        )

    plt.savefig(
        f"./results/economic_results/half/images/two_sectors/doinfo_two_sectors_{target}_tw_{tw}.pdf",
        dpi=600,
        bbox_inches="tight",
    )

<a name="J-S_divergence_half"></a>
#### J-S divergence

In [None]:
# The row index indicates the target sector, the column index the other sector
df_half = pd.DataFrame(
    columns=[
        "Industrials",
        "Basic Materials",
        "Financials",
        "Consumer Services",
        "Health Care",
        "Utilities",
        "Oil & Gas",
        "Consumer Goods",
    ]
)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAME

for target in sectors:
    other_sectors = [sector for sector in sectors if sector != target]

    for i, sector in enumerate(other_sectors):
        doinfo_one_sector = np.load(
            f"./results/economic_results/half/do_info/doinfo_one_sector_{target}_tw_{tw}.npy"
        )
        doinfo_two_sectors = np.load(
            f"./results/economic_results/half/do_info/doinfo_two_sectors_{target}_{sector}_tw_{tw}.npy"
        )

        df_half.at[target, sector] = JS_div(doinfo_one_sector, doinfo_two_sectors)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAME

df_half.to_csv("./results/economic_results/half/result_full.csv", index=True)
df_half

<a name="3_vs_1-1-1_half"></a>
### 3 vs 1-1-1

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for target in sectors:
    other_sectors = [sector for sector in sectors if sector != target]

    for i, sector1 in enumerate(other_sectors):
        other_sectors2 = [sector for sector in other_sectors if sector != sector1]
        for j, sector2 in enumerate(other_sectors2):

            doinfo_three_sectors = get_do_infos_three_different(
                target, sector1, sector2, binarized_financial_ts_half
            )
            np.save(
                f"./results/economic_results/half/do_info/doinfo_three_sectors_{target}_{sector1}_{sector2}_tw_{tw}.npy",
                doinfo_three_sectors,
            )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE VIOLIN PLOTS

for target1 in sectors:
    for target2 in [sector for sector in sectors if sector != target1]:
        other_sectors = [
            sector for sector in sectors if sector != target1 and sector != target2
        ]

        fig, axs = plt.subplots(3, 2, figsize=(40, 40))
        for i, sector in enumerate(other_sectors):
            row = i // 2
            col = i % 2
            doinfo_one_sector = np.load(
                f"./results/economic_results/half/do_info/doinfo_one_sector_{target1}_tw_{tw}.npy"
            )
            doinfo_three_sectors = np.load(
                f"./results/economic_results/half/do_info/doinfo_three_sectors_{target1}_{target2}_{sector}_tw_{tw}.npy"
            )
            parts = axs[row, col].violinplot(doinfo_one_sector, showextrema=False)
            for pc in parts["bodies"]:
                pc.set_facecolor("tab:blue")
                pc.set_edgecolor("black")
                pc.set_alpha(0.5)
            parts = axs[row, col].violinplot(doinfo_three_sectors, showextrema=False)
            for pc in parts["bodies"]:
                pc.set_facecolor("tab:orange")
                pc.set_edgecolor("black")
                pc.set_alpha(0.5)
            title = str(target1 + " - " + target2 + " - " + sector)
            axs[row, col].set_title(title, size=40)
            axs[row, col].set_ylabel(r"$d\Omega_3^{tot.}$", size=45)
            axs[row, col].tick_params(axis="both", labelsize=25)
            label1 = str("3 in " + target1)
            label2 = str("1 in " + target1 + ", 1 in " + target2 + ", 1 in " + sector)
            axs[row, col].legend(
                handles=[
                    mpatches.Patch(color="tab:blue", label=label1, alpha=0.5),
                    mpatches.Patch(color="tab:orange", label=label2, alpha=0.5),
                ],
                fontsize=25,
            )

        plt.savefig(
            f"./results/economic_results/half/images/three_sectors/doinfo_three_sectors_{target1}_{target2}_tw_{tw}.pdf",
            dpi=600,
            bbox_inches="tight",
        )

<a name="comparison_lengths"></a>
## Comparison different lengths

<a name="comparison_f_h"></a>
### Comparison full-half

In this section I compare the results obtained considering the entire length with those obtained considering only half of it. In each plot a target is considered; on the y-axis there is the JS-divergence (for full length) between the distributions in which the triplet is all in the target and in which one point is in the target and the other two in another sector. On the x axis there is the similar quantity but for half the length.

In [None]:
df_full = pd.read_csv("./results/economic_results/full/result_full.csv", index_col=0)
df_half = pd.read_csv("./results/economic_results/half/result_full.csv", index_col=0)

In [None]:
df_full

In [None]:
df_half

In [None]:
name_rows = df_full.index.tolist()
name_columns = df_full.columns.tolist()

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE SCATTER PLOTS

for row in name_rows:
    columns = [column for column in name_columns if column != row]

    x = [df_half.loc[row, column] for column in columns]
    y = [df_full.loc[row, column] for column in columns]
    
    x_values = np.linspace(min(x), max(x), 100)
    plt.plot(x_values, x_values, color="red", linewidth=1.5)

    plt.scatter(x, y)
    plt.xlabel("JS-div half length")
    plt.ylabel("JS-div full length")
    plt.title(f"Plot for target = {row}")
    plt.savefig(
        f"./results/economic_results/comparison/images/JS_div_two_sectors_target_{row}.pdf",
        dpi=600,
        bbox_inches="tight",
    )
    plt.show()

In [None]:
above_bisector = []
below_bisector = []

for row in name_rows:
    above_count = 0
    columns = [column for column in name_columns if column != row]

    x = [df_half.loc[row, column] for column in columns]
    y = [df_full.loc[row, column] for column in columns]
    
    for i in range(len(x)):
        if x[i] < y[i]:
            above_count += 1
            
    above_bisector.append(above_count)
    below_bisector.append(len(x) - above_count)
    

df_comparison = pd.DataFrame(
    [above_bisector, below_bisector], index=["+", "-"], columns=name_columns
)

df_comparison

<a name="creation_dataset"></a>
### Creation dataset

I consider the performance of the JS-div as the length of the TS varies.

Full length

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

length = len(binarized_financial_ts)

for target in sectors:
    other_sectors = [sector for sector in sectors if sector != target]

    for i, sector in enumerate(other_sectors):
        doinfo_one_sector = np.load(
            f"./results/economic_results/full/do_info/doinfo_one_sector_{target}_tw_{tw}.npy"
        )
        np.save(
            f"./results/economic_results/comparison/do_info/one_sector_{target}_length_{length}.npy",
            doinfo_one_sector,
        )

        doinfo_two_sectors = np.load(
            f"./results/economic_results/full/do_info/doinfo_two_sectors_{target}_{sector}_tw_{tw}.npy"
        )
        np.save(
            f"./results/economic_results/comparison/do_info/two_sectors_{target}_{sector}_length_{length}.npy",
            doinfo_two_sectors,
        )

Half length

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

length = int(len(binarized_financial_ts) / 2)

for target in sectors:
    other_sectors = [sector for sector in sectors if sector != target]

    for i, sector in enumerate(other_sectors):
        doinfo_one_sector = np.load(
            f"./results/economic_results/half/do_info/doinfo_one_sector_{target}_tw_{tw}.npy"
        )
        np.save(
            f"./results/economic_results/comparison/do_info/one_sector_{target}_length_{length}.npy",
            doinfo_one_sector,
        )

        doinfo_two_sectors = np.load(
            f"./results/economic_results/half/do_info/doinfo_two_sectors_{target}_{sector}_tw_{tw}.npy"
        )
        np.save(
            f"./results/economic_results/comparison/do_info/two_sectors_{target}_{sector}_length_{length}.npy",
            doinfo_two_sectors,
        )

Other lengths

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

lengths = [540, 1080, 1620, 2160, 3240, 3780, 4320, 4860]

for length in lengths:
    for target in sectors:
        other_sectors = [sector for sector in sectors if sector != target]
        for i, sector in enumerate(other_sectors):

            if i == 0:
                doinfo_one_sector = get_do_infos_same(
                    target, binarized_financial_ts[:length]
                )
                np.save(
                    f"./results/economic_results/comparison/do_info/one_sector_{target}_length_{length}.npy",
                    doinfo_one_sector,
                )

            doinfo_two_sectors = get_do_infos_two_different(
                target, sector, binarized_financial_ts[:length]
            )
            np.save(
                f"./results/economic_results/comparison/do_info/two_sectors_{target}_{sector}_length_{length}.npy",
                doinfo_two_sectors,
            )

<a name="comparison_industrials"></a>
#### Comparison Industrials

In [None]:
df_industrials = pd.DataFrame(
    columns=[
        "Basic Materials",
        "Financials",
        "Consumer Services",
        "Health Care",
        "Utilities",
        "Oil & Gas",
        "Consumer Goods",
    ]
)

In [None]:
lengths = [
    int(len(binarized_financial_ts) * i)
    for i in np.linspace(0.1, 1, 10)
]

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAME

for length in lengths:
    for target in ["Industrials"]:
        other_sectors = [sector for sector in sectors if sector != target]

        for i, sector in enumerate(other_sectors):
            doinfo_one_sector = np.load(
                f"./results/economic_results/comparison/do_info/one_sector_{target}_length_{length}.npy"
            )
            doinfo_two_sectors = np.load(
                f"./results/economic_results/comparison/do_info/two_sectors_{target}_{sector}_length_{length}.npy"
            )

            df_industrials.at[length, sector] = JS_div(doinfo_one_sector, doinfo_two_sectors)
            
df_industrials.to_csv("./results/economic_results/comparison/length_Industrials.csv", index=True)

In [None]:
df_industrials = pd.read_csv("./results/economic_results/comparison/length_Industrials.csv", index_col=0)
df_industrials

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE PLOT

num_rows = len(df_industrials.columns)
num_cols = 1

fig, axs = plt.subplots(num_rows, num_cols, figsize=(10, 25))

for i, column in enumerate(df_industrials.columns):
    label = str(f"{column}")
    axs[i].scatter(df_industrials.index, df_industrials[column], label=label)

    axs[i].legend(fontsize=13)
    axs[i].set_ylabel("JS-div", fontsize=14)

    if i == num_rows - 1:
        axs[i].set_xlabel("Length", fontsize=14)

fig.suptitle("Industrials", fontsize=22)
plt.subplots_adjust(top=0.96)

plt.savefig(
    f"./results/economic_results/comparison/images/JS_div_different_lengths_Industrials.pdf",
    dpi=600,
    bbox_inches="tight",
)

plt.show()

<a name="comparison_basic_materials"></a>
#### Comparison Basic Materials

In [None]:
df__basic_materials = pd.DataFrame(
    columns=[
        "Industrials",
        "Financials",
        "Consumer Services",
        "Health Care",
        "Utilities",
        "Oil & Gas",
        "Consumer Goods",
    ]
)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAME

for length in lengths:
    for target in ["Basic Materials"]:
        other_sectors = [sector for sector in sectors if sector != target]

        for i, sector in enumerate(other_sectors):
            doinfo_one_sector = np.load(
                f"./results/economic_results/comparison/do_info/one_sector_{target}_length_{length}.npy"
            )
            doinfo_two_sectors = np.load(
                f"./results/economic_results/comparison/do_info/two_sectors_{target}_{sector}_length_{length}.npy"
            )

            df__basic_materials.at[length, sector] = JS_div(doinfo_one_sector, doinfo_two_sectors)
            
df__basic_materials.to_csv("./results/economic_results/comparison/length_Basic_Materials.csv", index=True)

In [None]:
df__basic_materials = pd.read_csv("./results/economic_results/comparison/length_Basic_Materials.csv", index_col=0)
df__basic_materials

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE PLOT

num_rows = len(df__basic_materials.columns)
num_cols = 1

fig, axs = plt.subplots(num_rows, num_cols, figsize=(10, 25))

for i, column in enumerate(df__basic_materials.columns):
    label = str(f"{column}")
    axs[i].scatter(df__basic_materials.index, df__basic_materials[column], label=label)

    axs[i].legend(fontsize=13)
    axs[i].set_ylabel("JS-div", fontsize=14)

    if i == num_rows - 1:
        axs[i].set_xlabel("Length", fontsize=14)

fig.suptitle("Basic Materials", fontsize=22)
plt.subplots_adjust(top=0.96)

plt.savefig(
    f"./results/economic_results/comparison/images/JS_div_different_lengths_Basic_Materials.pdf",
    dpi=600,
    bbox_inches="tight",
)

plt.show()

<a name="comparison_financials"></a>
#### Comparison Financials

In [None]:
df_financials = pd.DataFrame(
    columns=[
        "Industrials",
        "Basic Materials",
        "Consumer Services",
        "Health Care",
        "Utilities",
        "Oil & Gas",
        "Consumer Goods",
    ]
)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAME

for length in lengths:
    for target in ["Financials"]:
        other_sectors = [sector for sector in sectors if sector != target]

        for i, sector in enumerate(other_sectors):
            doinfo_one_sector = np.load(
                f"./results/economic_results/comparison/do_info/one_sector_{target}_length_{length}.npy"
            )
            doinfo_two_sectors = np.load(
                f"./results/economic_results/comparison/do_info/two_sectors_{target}_{sector}_length_{length}.npy"
            )

            df_financials.at[length, sector] = JS_div(doinfo_one_sector, doinfo_two_sectors)

df_financials.to_csv("./results/economic_results/comparison/length_Financial.csv", index=True)

In [None]:
df_financials = pd.read_csv("./results/economic_results/comparison/length_Financial.csv", index_col=0)
df_financials

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE PLOT

num_rows = len(df_financials.columns)
num_cols = 1

fig, axs = plt.subplots(num_rows, num_cols, figsize=(10, 25))

for i, column in enumerate(df_financials.columns):
    label = str(f"{column}")
    axs[i].scatter(df_financials.index, df_financials[column], label=label)

    axs[i].legend(fontsize=13)
    axs[i].set_ylabel("JS-div", fontsize=14)

    if i == num_rows - 1:
        axs[i].set_xlabel("Length", fontsize=14)

fig.suptitle("Financials", fontsize=22)
plt.subplots_adjust(top=0.96)

plt.savefig(
    f"./results/economic_results/comparison/images/JS_div_different_lengths_Financials.pdf",
    dpi=600,
    bbox_inches="tight",
)

plt.show()

<a name="comparison_consumer_services"></a>
#### Comparison Consumer Services

In [None]:
df_consumer_services = pd.DataFrame(
    columns=[
        "Industrials",
        "Basic Materials",
        "Financials",
        "Health Care",
        "Utilities",
        "Oil & Gas",
        "Consumer Goods",
    ]
)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAME

for length in lengths:
    for target in ["Consumer Services"]:
        other_sectors = [sector for sector in sectors if sector != target]

        for i, sector in enumerate(other_sectors):
            doinfo_one_sector = np.load(
                f"./results/economic_results/comparison/do_info/one_sector_{target}_length_{length}.npy"
            )
            doinfo_two_sectors = np.load(
                f"./results/economic_results/comparison/do_info/two_sectors_{target}_{sector}_length_{length}.npy"
            )

            df_consumer_services.at[length, sector] = JS_div(doinfo_one_sector, doinfo_two_sectors)
            
df_consumer_services.to_csv("./results/economic_results/comparison/length_Consumer_Services.csv", index=True)

In [None]:
df_consumer_services = pd.read_csv("./results/economic_results/comparison/length_Consumer_Services.csv", index_col=0)
df_consumer_services

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE PLOT

num_rows = len(df_consumer_services.columns)
num_cols = 1

fig, axs = plt.subplots(num_rows, num_cols, figsize=(10, 25))

for i, column in enumerate(df_consumer_services.columns):
    label = str(f"{column}")
    axs[i].scatter(df_consumer_services.index, df_consumer_services[column], label=label)

    axs[i].legend(fontsize=13)
    axs[i].set_ylabel("JS-div", fontsize=14)

    if i == num_rows - 1:
        axs[i].set_xlabel("Length", fontsize=14)

fig.suptitle("Consumer Services", fontsize=22)
plt.subplots_adjust(top=0.96)

plt.savefig(
    f"./results/economic_results/comparison/images/JS_div_different_lengths_Consumer_Services.pdf",
    dpi=600,
    bbox_inches="tight",
)

plt.show()

<a name="comparison_health_care"></a>
#### Comparison Health Care

In [None]:
df_health_care = pd.DataFrame(
    columns=[
        "Industrials",
        "Basic Materials",
        "Financials",
        "Consumer Services",
        "Utilities",
        "Oil & Gas",
        "Consumer Goods",
    ]
)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAME

for length in lengths:
    for target in ["Health Care"]:
        other_sectors = [sector for sector in sectors if sector != target]

        for i, sector in enumerate(other_sectors):
            doinfo_one_sector = np.load(
                f"./results/economic_results/comparison/do_info/one_sector_{target}_length_{length}.npy"
            )
            doinfo_two_sectors = np.load(
                f"./results/economic_results/comparison/do_info/two_sectors_{target}_{sector}_length_{length}.npy"
            )

            df_health_care.at[length, sector] = JS_div(doinfo_one_sector, doinfo_two_sectors)
            
df_health_care.to_csv("./results/economic_results/comparison/length_Health_Care.csv", index=True)

In [None]:
df_health_care = pd.read_csv("./results/economic_results/comparison/length_Health_Care.csv", index_col=0)
df_health_care

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE PLOT

num_rows = len(df_health_care.columns)
num_cols = 1

fig, axs = plt.subplots(num_rows, num_cols, figsize=(10, 25))

for i, column in enumerate(df_health_care.columns):
    label = str(f"{column}")
    axs[i].scatter(df_health_care.index, df_health_care[column], label=label)

    axs[i].legend(fontsize=13)
    axs[i].set_ylabel("JS-div", fontsize=14)

    if i == num_rows - 1:
        axs[i].set_xlabel("Length", fontsize=14)

fig.suptitle("Health Care", fontsize=22)
plt.subplots_adjust(top=0.96)

plt.savefig(
    f"./results/economic_results/comparison/images/JS_div_different_lengths_Health_Care.pdf",
    dpi=600,
    bbox_inches="tight",
)

plt.show()

<a name="comparison_utilities"></a>
#### Comparison Utilities

In [None]:
df_utilities = pd.DataFrame(
    columns=[
        "Industrials",
        "Basic Materials",
        "Financials",
        "Consumer Services",
        "Health Care",
        "Oil & Gas",
        "Consumer Goods",
    ]
)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAME

for length in lengths:
    for target in ["Utilities"]:
        other_sectors = [sector for sector in sectors if sector != target]

        for i, sector in enumerate(other_sectors):
            doinfo_one_sector = np.load(
                f"./results/economic_results/comparison/do_info/one_sector_{target}_length_{length}.npy"
            )
            doinfo_two_sectors = np.load(
                f"./results/economic_results/comparison/do_info/two_sectors_{target}_{sector}_length_{length}.npy"
            )

            df_utilities.at[length, sector] = JS_div(doinfo_one_sector, doinfo_two_sectors)
            
df_utilities.to_csv("./results/economic_results/comparison/length_Utilities.csv", index=True)

In [None]:
df_utilities = pd.read_csv("./results/economic_results/comparison/length_Utilities.csv", index_col=0)
df_utilities

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE PLOT

num_rows = len(df_utilities.columns)
num_cols = 1

fig, axs = plt.subplots(num_rows, num_cols, figsize=(10, 25))

for i, column in enumerate(df_utilities.columns):
    label = str(f"{column}")
    axs[i].scatter(df_utilities.index, df_utilities[column], label=label)

    axs[i].legend(fontsize=13)
    axs[i].set_ylabel("JS-div", fontsize=14)

    if i == num_rows - 1:
        axs[i].set_xlabel("Length", fontsize=14)

fig.suptitle("Utilities", fontsize=22)
plt.subplots_adjust(top=0.96)

plt.savefig(
    f"./results/economic_results/comparison/images/JS_div_different_lengths_Utilities.pdf",
    dpi=600,
    bbox_inches="tight",
)

plt.show()

<a name="comparison_oil_gas"></a>
#### Comparison Oil & Gas

In [None]:
df_oil_gas = pd.DataFrame(
    columns=[
        "Industrials",
        "Basic Materials",
        "Financials",
        "Consumer Services",
        "Health Care",
        "Utilities",
        "Consumer Goods",
    ]
)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAME

for length in lengths:
    for target in ["Oil & Gas"]:
        other_sectors = [sector for sector in sectors if sector != target]

        for i, sector in enumerate(other_sectors):
            doinfo_one_sector = np.load(
                f"./results/economic_results/comparison/do_info/one_sector_{target}_length_{length}.npy"
            )
            doinfo_two_sectors = np.load(
                f"./results/economic_results/comparison/do_info/two_sectors_{target}_{sector}_length_{length}.npy"
            )

            df_oil_gas.at[length, sector] = JS_div(doinfo_one_sector, doinfo_two_sectors)
            
df_oil_gas.to_csv("./results/economic_results/comparison/length_Oil_Gas.csv", index=True)

In [None]:
df_oil_gas = pd.read_csv("./results/economic_results/comparison/length_Oil_Gas.csv", index_col=0)
df_oil_gas

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE PLOT

num_rows = len(df_oil_gas.columns)
num_cols = 1

fig, axs = plt.subplots(num_rows, num_cols, figsize=(10, 25))

for i, column in enumerate(df_oil_gas.columns):
    label = str(f"{column}")
    axs[i].scatter(df_oil_gas.index, df_oil_gas[column], label=label)

    axs[i].legend(fontsize=13)
    axs[i].set_ylabel("JS-div", fontsize=14)

    if i == num_rows - 1:
        axs[i].set_xlabel("Length", fontsize=14)

fig.suptitle("Oil & Gas", fontsize=22)
plt.subplots_adjust(top=0.96)

plt.savefig(
    f"./results/economic_results/comparison/images/JS_div_different_lengths_Oil_Gas.pdf",
    dpi=600,
    bbox_inches="tight",
)

plt.show()

<a name="comparison_consumer_goods"></a>
#### Comparison Consumer Goods

In [None]:
df_consumer_goods = pd.DataFrame(
    columns=[
        "Industrials",
        "Basic Materials",
        "Financials",
        "Consumer Services",
        "Health Care",
        "Utilities",
        "Oil & Gas",
    ]
)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAME

for length in lengths:
    for target in ["Consumer Goods"]:
        other_sectors = [sector for sector in sectors if sector != target]

        for i, sector in enumerate(other_sectors):
            doinfo_one_sector = np.load(
                f"./results/economic_results/comparison/do_info/one_sector_{target}_length_{length}.npy"
            )
            doinfo_two_sectors = np.load(
                f"./results/economic_results/comparison/do_info/two_sectors_{target}_{sector}_length_{length}.npy"
            )

            df_consumer_goods.at[length, sector] = JS_div(doinfo_one_sector, doinfo_two_sectors)
            
df_consumer_goods.to_csv("./results/economic_results/comparison/length_Consumer_Goods.csv", index=True)

In [None]:
df_consumer_goods = pd.read_csv("./results/economic_results/comparison/length_Consumer_Goods.csv", index_col=0)
df_consumer_goods

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE PLOT

num_rows = len(df_consumer_goods.columns)
num_cols = 1

fig, axs = plt.subplots(num_rows, num_cols, figsize=(10, 25))

for i, column in enumerate(df_consumer_goods.columns):
    label = str(f"{column}")
    axs[i].scatter(df_consumer_goods.index, df_consumer_goods[column], label=label)

    axs[i].legend(fontsize=13)
    axs[i].set_ylabel("JS-div", fontsize=14)

    if i == num_rows - 1:
        axs[i].set_xlabel("Length", fontsize=14)

fig.suptitle("Consumer Goods", fontsize=22)
plt.subplots_adjust(top=0.96)

plt.savefig(
    f"./results/economic_results/comparison/images/JS_div_different_lengths_Consumer_Goods.pdf",
    dpi=600,
    bbox_inches="tight",
)

plt.show()

<a name="heatmaps"></a>
## Heatmaps

<a name="matrix"></a>
### Matrix

2 in the source and 1 in the target

In [None]:
df_matrix_all = pd.DataFrame(
    columns=[
        "Industrials",
        "Basic Materials",
        "Financials",
        "Consumer Services",
        "Health Care",
        "Utilities",
        "Oil & Gas",
        "Consumer Goods",
    ]
)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAME

for target in sectors:
    for i, sector in enumerate(sectors):
        if target == sector:
            doinfo_one_sector = np.load(
                f"./results/economic_results/full/do_info/doinfo_one_sector_{target}_tw_{tw}.npy"
            )
            df_matrix_all.at[target, sector] = np.mean(doinfo_one_sector)
        else:
            doinfo_two_sectors = np.load(
                f"./results/economic_results/full/do_info/doinfo_two_sectors_{target}_{sector}_tw_{tw}.npy"
            )
            df_matrix_all.at[target, sector] = np.mean(doinfo_two_sectors)
            

            
df_matrix_all.to_csv("./results/economic_results/comparison/matrix_2_1_all.csv", index=True)

In [None]:
df_matrix_all = pd.read_csv("./results/economic_results/comparison/matrix_2_1_all.csv", index_col=0)
df_matrix_all

In [None]:
sns.heatmap(df_matrix_all)

plt.xlabel("Source")
plt.ylabel("Target")

plt.savefig(
    f"./results/economic_results/comparison/images/heatmap_matrix_all.pdf",
    dpi=600,
    bbox_inches="tight",
)

plt.show()

Let's now consider only the 3 more synergistic/redundant for each distribution

In [None]:
df_matrix_red, df_matrix_syn = df_matrix_all.copy(), df_matrix_all.copy()

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAME

for target in sectors:
    for i, sector in enumerate(sectors):
        if target == sector:
            doinfo_one_sector = np.load(
                f"./results/economic_results/full/do_info/doinfo_one_sector_{target}_tw_{tw}.npy"
            )
            df_matrix_red.at[target, sector] = np.mean(np.sort(doinfo_one_sector)[-3:])
        else:
            doinfo_two_sectors = np.load(
                f"./results/economic_results/full/do_info/doinfo_two_sectors_{target}_{sector}_tw_{tw}.npy"
            )
            df_matrix_red.at[target, sector] = np.mean(np.sort(doinfo_two_sectors)[-3:])
            

            
df_matrix_red.to_csv("./results/economic_results/comparison/matrix_2_1_red.csv", index=True)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAME

for target in sectors:
    for i, sector in enumerate(sectors):
        if target == sector:
            doinfo_one_sector = np.load(
                f"./results/economic_results/full/do_info/doinfo_one_sector_{target}_tw_{tw}.npy"
            )
            df_matrix_syn.at[target, sector] = np.mean(np.sort(doinfo_one_sector)[:3])
        else:
            doinfo_two_sectors = np.load(
                f"./results/economic_results/full/do_info/doinfo_two_sectors_{target}_{sector}_tw_{tw}.npy"
            )
            df_matrix_syn.at[target, sector] = np.mean(np.sort(doinfo_two_sectors)[:3])
            

            
df_matrix_syn.to_csv("./results/economic_results/comparison/matrix_2_1_syn.csv", index=True)

In [None]:
df_matrix_red = pd.read_csv("./results/economic_results/comparison/matrix_2_1_red.csv", index_col=0)

In [None]:
sns.heatmap(df_matrix_red)

plt.xlabel("Source")
plt.ylabel("Target")

plt.savefig(
    f"./results/economic_results/comparison/images/heatmap_matrix_red.pdf",
    dpi=600,
    bbox_inches="tight",
)

plt.show()

In [None]:
df_matrix_syn = pd.read_csv("./results/economic_results/comparison/matrix_2_1_syn.csv", index_col=0)

In [None]:
sns.heatmap(df_matrix_syn)

plt.xlabel("Source")
plt.ylabel("Target")

plt.savefig(
    f"./results/economic_results/comparison/images/heatmap_matrix_syn.pdf",
    dpi=600,
    bbox_inches="tight",
)

plt.show()

1 in target1, 1 in target2 and 1 in source

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAMES

for target1 in sectors:
    df_matrix = pd.DataFrame(
        columns=[
            "Industrials",
            "Basic Materials",
            "Financials",
            "Consumer Services",
            "Health Care",
            "Utilities",
            "Oil & Gas",
            "Consumer Goods",
        ]
    )
    for target2 in sectors:
        for source in sectors:

            if target2 == target1 and source == target1:
                doinfo_one_sector = np.load(
                    f"./results/economic_results/full/do_info/doinfo_one_sector_{target2}_tw_{tw}.npy"
                )
                df_matrix.at[target2, source] = np.mean(doinfo_one_sector)

            elif target2 == target1 and source != target1:
                doinfo_two_sectors = np.load(
                    f"./results/economic_results/full/do_info/doinfo_two_sectors_{source}_{target2}_tw_{tw}.npy"
                )
                df_matrix.at[target2, source] = np.mean(doinfo_two_sectors)

            elif target2 != target1 and source == target1:
                doinfo_two_sectors = np.load(
                    f"./results/economic_results/full/do_info/doinfo_two_sectors_{target2}_{source}_tw_{tw}.npy"
                )
                df_matrix.at[target2, source] = np.mean(doinfo_two_sectors)

            elif target2 != target1 and source == target2:
                doinfo_two_sectors = np.load(
                    f"./results/economic_results/full/do_info/doinfo_two_sectors_{target1}_{target2}_tw_{tw}.npy"
                )
                df_matrix.at[target2, source] = np.mean(doinfo_two_sectors)

            elif target2 != target1 and source != target2:
                doinfo_three_sectors = np.load(
                    f"./results/economic_results/full/do_info/doinfo_three_sectors_{target1}_{target2}_{source}_tw_{tw}.npy"
                )
                df_matrix.at[target2, source] = np.mean(doinfo_three_sectors)

        df_matrix.to_csv(f"./results/economic_results/comparison/matrix_1_1_1_{target1}.csv", index=True)

In [None]:
df_industrials = pd.read_csv("./results/economic_results/comparison/matrix_1_1_1_Industrials.csv", index_col=0)
df_basic_materials = pd.read_csv("./results/economic_results/comparison/matrix_1_1_1_Basic Materials.csv", index_col=0)
df_financials = pd.read_csv("./results/economic_results/comparison/matrix_1_1_1_Financials.csv", index_col=0)
df_consumer_services = pd.read_csv("./results/economic_results/comparison/matrix_1_1_1_Consumer Services.csv", index_col=0)
df_health_care = pd.read_csv("./results/economic_results/comparison/matrix_1_1_1_Health Care.csv", index_col=0)
df_utilities = pd.read_csv("./results/economic_results/comparison/matrix_1_1_1_Utilities.csv", index_col=0)
df_oil_gas = pd.read_csv("./results/economic_results/comparison/matrix_1_1_1_Oil & Gas.csv", index_col=0)
df_consumer_goods = pd.read_csv("./results/economic_results/comparison/matrix_1_1_1_Consumer Goods.csv", index_col=0)

diz_matrix = {
    "Industrials": df_industrials,
    "Basic Materials": df_basic_materials,
    "Financials": df_financials,
    "Consumer Services": df_consumer_services,
    "Health Care": df_health_care,
    "Utilities": df_utilities,
    "Oil & Gas": df_oil_gas,
    "Consumer Goods": df_consumer_goods,
}

In [None]:
for sector in sectors:
    sns.heatmap(diz_matrix[sector])
    
    plt.xlabel("Source")
    plt.ylabel("Target")
    plt.title(f"1 in {sector}, 1 in Target, 1 in Source")
    
    plt.savefig(
        f"./results/economic_results/comparison/images/heatmap_matrix_{sector}.pdf",
        dpi=600,
        bbox_inches="tight",
    )
    
    plt.show()

<a name="divided_year"></a>
### Divided by year

Load data

In [None]:
# I am deleting the data for the Techonolgy sector because there are only a few
financial_ts = financial_ts.drop(
    columns=["CACI", "CIEN", "DBD", "DDD"]
)

In [None]:
financial_ts["Date"] = pd.to_datetime(financial_ts["Date"])

years = financial_ts["Date"].dt.year.unique()[1:]

diz_years = {}

for year in years:
    df_year = financial_ts[financial_ts["Date"].dt.year == year]

    diz_years[year] = df_year

In [None]:
diz_years[2000]

In [None]:
tw = 7
diz_years_bin = {}
    
for year in years:
    diz_years_bin[year] = binarize_df(diz_years[year], tw)

In [None]:
diz_years_bin[2000]

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for year in years:
    for target in sectors:
        other_sectors = [sector for sector in sectors if sector != target]
        for i, sector in enumerate(other_sectors):

            if i == 0:
                doinfo_one_sector = get_do_infos_same(target, diz_years_bin[year])
                np.save(
                    f"./results/economic_results/comparison/do_info/years/one_year/one_sector_{target}_year_{year}.npy",
                    doinfo_one_sector,
                )

            doinfo_two_sectors = get_do_infos_two_different(
                target, sector, diz_years_bin[year]
            )
            np.save(
                f"./results/economic_results/comparison/do_info/years/one_year/two_sectors_{target}_{sector}_year_{year}.npy",
                doinfo_two_sectors,
            )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAMES

for target in sectors:
    df_one_year = pd.DataFrame(columns=years)

    for year in years:
        for sector in sectors:

            if target == sector:
                doinfo_one_sector = np.load(
                    f"./results/economic_results/comparison/do_info/years/one_year/one_sector_{target}_year_{year}.npy"
                )
                df_one_year.at[sector, year] = np.mean(doinfo_one_sector)
            else:
                doinfo_two_sectors = np.load(
                    f"./results/economic_results/comparison/do_info/years/one_year/two_sectors_{target}_{sector}_year_{year}.npy"
                )
                df_one_year.at[sector, year] = np.mean(doinfo_two_sectors)

    df_one_year.to_csv(
        f"./results/economic_results/comparison/one_year_{target}.csv", index=True
    )

In [None]:
df_industrials = pd.read_csv("./results/economic_results/comparison/one_year_Industrials.csv", index_col=0)
df_basic_materials = pd.read_csv("./results/economic_results/comparison/one_year_Basic Materials.csv", index_col=0)
df_financials = pd.read_csv("./results/economic_results/comparison/one_year_Financials.csv", index_col=0)
df_consumer_services = pd.read_csv("./results/economic_results/comparison/one_year_Consumer Services.csv", index_col=0)
df_health_care = pd.read_csv("./results/economic_results/comparison/one_year_Health Care.csv", index_col=0)
df_utilities = pd.read_csv("./results/economic_results/comparison/one_year_Utilities.csv", index_col=0)
df_oil_gas = pd.read_csv("./results/economic_results/comparison/one_year_Oil & Gas.csv", index_col=0)
df_consumer_goods = pd.read_csv("./results/economic_results/comparison/one_year_Consumer Goods.csv", index_col=0)

In [None]:
df_years_all_sectors = {
    "Industrials": df_industrials,
    "Basic Materials": df_basic_materials,
    "Financials": df_financials,
    "Consumer Services": df_consumer_services,
    "Health Care": df_health_care,
    "Utilities": df_utilities,
    "Oil & Gas": df_oil_gas,
    "Consumer Goods": df_consumer_goods,
}

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE HEATMAPS

for sector in sectors:
    sns.heatmap(df_years_all_sectors[sector])

    plt.xlabel("Year")
    plt.ylabel("Source")
    plt.title(f"1 in {sector}, 2 in source")

    plt.savefig(
        f"./results/economic_results/comparison/images/heatmap_one_year_{sector}.pdf",
        dpi=600,
        bbox_inches="tight",
    )

    plt.show()

In [None]:
print("Anno 2000:", len(diz_years_bin[2000]))
print("Anno 2021:", len(diz_years_bin[2021]))

Let's eliminate the year 2021, to see if any patterns emerge

In [None]:
df_years_all_sectors_no_2021 = {}

for sector in sectors:
    df = df_years_all_sectors[sector]
    df_no_2021 = df.iloc[:, :-1]
    df_years_all_sectors_no_2021[sector] = df_no_2021

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE HEATMAPS

for sector in sectors:
    sns.heatmap(df_years_all_sectors_no_2021[sector])

    plt.xlabel("Year")
    plt.ylabel("Source")
    plt.title(f"1 in {sector}, 2 in source (no 2021)")

    plt.savefig(
        f"./results/economic_results/comparison/images/heatmap_one_year_{sector}_no_2021.pdf",
        dpi=600,
        bbox_inches="tight",
    )

    plt.show()

<a name="year_same_sector"></a>
#### Same sector

Let's now plot all three elements in the same sector

In [None]:
years = df_years_all_sectors_no_2021["Industrials"].columns
df_years_same_sector_no_2021 = pd.DataFrame(columns=years)

In [None]:
for sector in sectors:
    for year in years:
        df_years_same_sector_no_2021.at[sector, year] = df_years_all_sectors_no_2021[
            sector
        ].at[sector, year]
        
df_years_same_sector_no_2021.to_csv(
    "./results/economic_results/comparison/one_year_same_sector_no_2021.csv", index=True
)

In [None]:
df_years_same_sector_no_2021 = pd.read_csv(
    "./results/economic_results/comparison/one_year_same_sector_no_2021.csv", index_col=0
)

In [None]:
sns.heatmap(df_years_same_sector_no_2021)

plt.xlabel("Year")
plt.ylabel("Target")
plt.title("All 3 in Target")

plt.savefig(
    f"./results/economic_results/comparison/images/heatmap_one_year_same_sector_no_2021.pdf",
    dpi=600,
    bbox_inches="tight",
)

plt.show()

Let's make the average only on the three more synergistic/redundant

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAMES

df_one_year_red = pd.DataFrame(columns=years)
df_one_year_syn = pd.DataFrame(columns=years)

for target in sectors:
    for year in years:
        doinfo_one_sector = np.load(
            f"./results/economic_results/comparison/do_info/years/one_year/one_sector_{target}_year_{year}.npy"
        )
        df_one_year_red.at[target, year] = np.mean(np.sort(doinfo_one_sector)[-3:])
        df_one_year_syn.at[target, year] = np.mean(np.sort(doinfo_one_sector)[:3])

df_one_year_red.to_csv(
    "./results/economic_results/comparison/one_year_same_sector_no_2021_red.csv", index=True
)
df_one_year_syn.to_csv(
    "./results/economic_results/comparison/one_year_same_sector_no_2021_syn.csv", index=True
)

In [None]:
df_one_year_red = pd.read_csv(
    "./results/economic_results/comparison/one_year_same_sector_no_2021_red.csv",
    index_col=0,
)
df_one_year_syn = pd.read_csv(
    "./results/economic_results/comparison/one_year_same_sector_no_2021_syn.csv",
    index_col=0,
)

In [None]:
sns.heatmap(df_one_year_red)

plt.xlabel("Year")
plt.ylabel("Target")
plt.title("All 3 in Target (3 most redundant)")

plt.savefig(
    f"./results/economic_results/comparison/images/heatmap_one_year_same_sector_no_2021_red.pdf",
    dpi=600,
    bbox_inches="tight",
)

plt.show()

In [None]:
sns.heatmap(df_one_year_syn)

plt.xlabel("Year")
plt.ylabel("Target")
plt.title("All 3 in Target (3 most synergistic)")

plt.savefig(
    f"./results/economic_results/comparison/images/heatmap_one_year_same_sector_no_2021_syn.pdf",
    dpi=600,
    bbox_inches="tight",
)

plt.show()

<a name="divided_two_years"></a>
### Divided by two years

In [None]:
financial_ts["Date"] = pd.to_datetime(financial_ts["Date"])

# I remove 1999
years = financial_ts["Date"].dt.year.unique()[1:]

diz_years = {}

for year in years:
    df_year = financial_ts[financial_ts["Date"].dt.year == year]
    diz_years[year] = df_year
    
diz_two_years = {}

for i in range(0, len(years), 2):
    start_year = years[i]
    end_year = years[i + 1]
    combined_key = f"{start_year}_{end_year}"
    
    combined_df = pd.concat([diz_years[start_year], diz_years[end_year]])
    
    diz_two_years[combined_key] = combined_df

In [None]:
diz_two_years["2000_2001"]

In [None]:
tw = 7
diz_two_years_bin = {}
    
for year in diz_two_years.keys():
    diz_two_years_bin[year] = binarize_df(diz_two_years[year], tw)

In [None]:
diz_two_years_bin["2000_2001"]

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for year in diz_two_years.keys():
    for target in sectors:
        other_sectors = [sector for sector in sectors if sector != target]
        for i, sector in enumerate(other_sectors):

            if i == 0:
                doinfo_one_sector = get_do_infos_same(target, diz_two_years_bin[year])
                np.save(
                    f"./results/economic_results/comparison/do_info/years/two_years/one_sector_{target}_years_{year}.npy",
                    doinfo_one_sector,
                )

            doinfo_two_sectors = get_do_infos_two_different(
                target, sector, diz_two_years_bin[year]
            )
            np.save(
                f"./results/economic_results/comparison/do_info/years/two_years/two_sectors_{target}_{sector}_years_{year}.npy",
                doinfo_two_sectors,
            )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAMES

years = diz_two_years.keys()
for target in sectors:
    df_two_years = pd.DataFrame(columns=years)

    for year in years:
        for sector in sectors:

            if target == sector:
                doinfo_one_sector = np.load(
                    f"./results/economic_results/comparison/do_info/years/two_years/one_sector_{target}_years_{year}.npy"
                )
                df_two_years.at[sector, year] = np.mean(doinfo_one_sector)
            else:
                doinfo_two_sectors = np.load(
                    f"./results/economic_results/comparison/do_info/years/two_years/two_sectors_{target}_{sector}_years_{year}.npy"
                )
                df_two_years.at[sector, year] = np.mean(doinfo_two_sectors)

    df_two_years.to_csv(
        f"./results/economic_results/comparison/two_years_{target}.csv", index=True
    )

In [None]:
df_industrials = pd.read_csv("./results/economic_results/comparison/two_years_Industrials.csv", index_col=0)
df_basic_materials = pd.read_csv("./results/economic_results/comparison/two_years_Basic Materials.csv", index_col=0)
df_financials = pd.read_csv("./results/economic_results/comparison/two_years_Financials.csv", index_col=0)
df_consumer_services = pd.read_csv("./results/economic_results/comparison/two_years_Consumer Services.csv", index_col=0)
df_health_care = pd.read_csv("./results/economic_results/comparison/two_years_Health Care.csv", index_col=0)
df_utilities = pd.read_csv("./results/economic_results/comparison/two_years_Utilities.csv", index_col=0)
df_oil_gas = pd.read_csv("./results/economic_results/comparison/two_years_Oil & Gas.csv", index_col=0)
df_consumer_goods = pd.read_csv("./results/economic_results/comparison/two_years_Consumer Goods.csv", index_col=0)

In [None]:
df_two_years_all_sectors = {
    "Industrials": df_industrials,
    "Basic Materials": df_basic_materials,
    "Financials": df_financials,
    "Consumer Services": df_consumer_services,
    "Health Care": df_health_care,
    "Utilities": df_utilities,
    "Oil & Gas": df_oil_gas,
    "Consumer Goods": df_consumer_goods,
}

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE HEATMAPS

for sector in sectors:
    sns.heatmap(df_two_years_all_sectors[sector])

    plt.xlabel("Year")
    plt.ylabel("Source")
    plt.title(f"1 in {sector}, 2 in source")

    plt.savefig(
        f"./results/economic_results/comparison/images/heatmap_two_years_{sector}.pdf",
        dpi=600,
        bbox_inches="tight",
    )

    plt.show()

<a name="divided_three_years"></a>
### Divided by three years

In [None]:
financial_ts["Date"] = pd.to_datetime(financial_ts["Date"])

# I remove 1999 and 2021
years = financial_ts["Date"].dt.year.unique()[1:-1]

diz_years = {}

for year in years:
    df_year = financial_ts[financial_ts["Date"].dt.year == year]
    diz_years[year] = df_year
    
diz_three_years = {}

for i in range(0, len(years), 3):
    if i + 2 < len(years):
        start_year = years[i]
        mid_year = years[i + 1]
        end_year = years[i + 2]
        combined_key = f"{start_year}_{mid_year}_{end_year}"

        combined_df = pd.concat([diz_years[start_year], diz_years[mid_year], diz_years[end_year]])

        diz_three_years[combined_key] = combined_df

In [None]:
diz_three_years["2000_2001_2002"]

In [None]:
tw = 7
diz_three_years_bin = {}
    
for year in diz_three_years.keys():
    diz_three_years_bin[year] = binarize_df(diz_three_years[year], tw)

In [None]:
diz_three_years_bin["2000_2001_2002"]

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for year in diz_three_years_bin.keys():
    for target in sectors:
        other_sectors = [sector for sector in sectors if sector != target]
        for i, sector in enumerate(other_sectors):

            if i == 0:
                doinfo_one_sector = get_do_infos_same(target, diz_three_years_bin[year])
                np.save(
                    f"./results/economic_results/comparison/do_info/years/three_years/one_sector_{target}_years_{year}.npy",
                    doinfo_one_sector,
                )

            doinfo_two_sectors = get_do_infos_two_different(
                target, sector, diz_three_years_bin[year]
            )
            np.save(
                f"./results/economic_results/comparison/do_info/years/three_years/two_sectors_{target}_{sector}_years_{year}.npy",
                doinfo_two_sectors,
            )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAMES

years = diz_three_years_bin.keys()
for target in sectors:
    df_three_years = pd.DataFrame(columns=years)

    for year in years:
        for sector in sectors:

            if target == sector:
                doinfo_one_sector = np.load(
                    f"./results/economic_results/comparison/do_info/years/three_years/one_sector_{target}_years_{year}.npy"
                )
                df_three_years.at[sector, year] = np.mean(doinfo_one_sector)
            else:
                doinfo_two_sectors = np.load(
                    f"./results/economic_results/comparison/do_info/years/three_years/two_sectors_{target}_{sector}_years_{year}.npy"
                )
                df_three_years.at[sector, year] = np.mean(doinfo_two_sectors)

    df_three_years.to_csv(
        f"./results/economic_results/comparison/three_years_{target}.csv", index=True
    )

In [None]:
df_industrials = pd.read_csv("./results/economic_results/comparison/three_years_Industrials.csv", index_col=0)
df_basic_materials = pd.read_csv("./results/economic_results/comparison/three_years_Basic Materials.csv", index_col=0)
df_financials = pd.read_csv("./results/economic_results/comparison/three_years_Financials.csv", index_col=0)
df_consumer_services = pd.read_csv("./results/economic_results/comparison/three_years_Consumer Services.csv", index_col=0)
df_health_care = pd.read_csv("./results/economic_results/comparison/three_years_Health Care.csv", index_col=0)
df_utilities = pd.read_csv("./results/economic_results/comparison/three_years_Utilities.csv", index_col=0)
df_oil_gas = pd.read_csv("./results/economic_results/comparison/three_years_Oil & Gas.csv", index_col=0)
df_consumer_goods = pd.read_csv("./results/economic_results/comparison/three_years_Consumer Goods.csv", index_col=0)

In [None]:
df_three_years_all_sectors = {
    "Industrials": df_industrials,
    "Basic Materials": df_basic_materials,
    "Financials": df_financials,
    "Consumer Services": df_consumer_services,
    "Health Care": df_health_care,
    "Utilities": df_utilities,
    "Oil & Gas": df_oil_gas,
    "Consumer Goods": df_consumer_goods,
}

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE HEATMAPS

for sector in sectors:
    sns.heatmap(df_three_years_all_sectors[sector])

    plt.xlabel("Year")
    plt.ylabel("Source")
    plt.title(f"1 in {sector}, 2 in source")

    plt.savefig(
        f"./results/economic_results/comparison/images/heatmap_three_years_{sector}.pdf",
        dpi=600,
        bbox_inches="tight",
    )

    plt.show()

<a name="o_info"></a>
## O-information

In [None]:
sectors = [
    "Industrials",
    "Basic Materials",
    "Financials",
    "Consumer Services",
    "Health Care",
    "Utilities",
    "Oil & Gas",
    "Consumer Goods",
]

In [None]:
# I am deleting the data for the Techonolgy sector because there are only a few
financial_ts = financial_ts.drop(
    columns=["CACI", "CIEN", "DBD", "DDD"]
)
financial_ts_gauss = financial_ts_gauss.drop(
    columns=["CACI", "CIEN", "DBD", "DDD"]
)

In [None]:
binarized_financial_ts = binarized_financial_ts.drop(
    columns=["CACI", "CIEN", "DBD", "DDD"]
)
binarized_financial_ts_gauss = binarized_financial_ts_gauss.drop(
    columns=["CACI", "CIEN", "DBD", "DDD"]
)

<a name="o_info_3"></a>
### 3 in Sector A

<a name="o_info_3_matrix"></a>
#### Matrix

In [None]:
df_matrix_all = pd.DataFrame(
    columns=[
        "Industrials",
        "Basic Materials",
        "Financials",
        "Consumer Services",
        "Health Care",
        "Utilities",
        "Oil & Gas",
        "Consumer Goods",
    ]
)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for target in sectors:
    oinfo_one_sector = get_o_infos_same(target, binarized_financial_ts)
    np.save(
        f"./results/economic_results/o_info/metrics/3/all/one_sector_{target}.npy",
        oinfo_one_sector,
    )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAME

for target in sectors:
    oinfo_one_sector = np.load(
        f"./results/economic_results/o_info/metrics/3/all/one_sector_{target}.npy"
    )
    df_matrix_all.at["", target] = np.mean(oinfo_one_sector)


df_matrix_all.to_csv(
    "./results/economic_results/o_info/dataframes/3/all/matrix.csv",
    index=True,
)

In [None]:
df_matrix_all = pd.read_csv(
    "./results/economic_results/o_info/dataframes/3/all/matrix.csv",
    index_col=0,
)

In [None]:
sns.heatmap(df_matrix_all)

plt.xlabel("Target")
plt.ylabel("mean O-info")

plt.savefig(
    f"./results/economic_results/o_info/images/3/all/heatmap_matrix.pdf",
    dpi=600,
    bbox_inches="tight",
)

plt.show()

<a name="o_info_3_one_year"></a>
#### One year

In [None]:
financial_ts["Date"] = pd.to_datetime(financial_ts["Date"])

years = financial_ts["Date"].dt.year.unique()[1:]

diz_years = {}

for year in years:
    df_year = financial_ts[financial_ts["Date"].dt.year == year]

    diz_years[year] = df_year

In [None]:
tw = 7
diz_years_bin = {}
    
for year in years:
    diz_years_bin[year] = binarize_df(diz_years[year], tw)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for year in years:
    for target in sectors:
        oinfo_one_sector = get_o_infos_same(target, diz_years_bin[year])
        np.save(
            f"./results/economic_results/o_info/metrics/3/one_year/one_sector_{target}_year_{year}.npy",
            oinfo_one_sector,
        )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAMES

df_one_year = pd.DataFrame(columns=years)
for target in sectors:
    for year in years:
        oinfo_one_sector = np.load(
            f"./results/economic_results/o_info/metrics/3/one_year/one_sector_{target}_year_{year}.npy"
        )
        df_one_year.at[target, year] = np.mean(oinfo_one_sector)

df_one_year.to_csv(
    f"./results/economic_results/o_info/dataframes/3/one_year/dataframe.csv",
    index=True,
)

In [None]:
df_one_year = pd.read_csv(
    "./results/economic_results/o_info/dataframes/3/one_year/dataframe.csv",
    index_col=0,
)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE HEATMAP

sns.heatmap(df_one_year)

plt.axvline(x=1.5, color="yellow", linestyle="--", label="twin towers")
plt.axvline(x=8.5, color="blue", linestyle="--", label="financial crisis")
plt.axvline(x=9.5, color="green", linestyle="--", label="Obama president")
plt.axvline(x=17.5, color="brown", linestyle="--", label="Trump president")
plt.axvline(x=19.5, color="pink", linestyle="--", label="covid-19")
plt.axvline(x=21.5, color="cyan", linestyle="--", label="Biden president")

plt.xlabel("Year")
plt.ylabel("Source")
plt.title("3 in source (O-info)")

# plt.legend()

plt.savefig(
    f"./results/economic_results/o_info/images/3/one_year/heatmap.pdf",
    dpi=600,
    bbox_inches="tight",
)

plt.show()

<a name="o_info_3_two_years"></a>
#### Two years

In [None]:
# I remove 1999
years = financial_ts["Date"].dt.year.unique()[1:]

diz_years = {}

for year in years:
    df_year = financial_ts[financial_ts["Date"].dt.year == year]
    diz_years[year] = df_year
    
diz_two_years = {}

for i in range(0, len(years), 2):
    start_year = years[i]
    end_year = years[i + 1]
    combined_key = f"{start_year}_{end_year}"
    
    combined_df = pd.concat([diz_years[start_year], diz_years[end_year]])
    
    diz_two_years[combined_key] = combined_df

In [None]:
tw = 7
diz_two_years_bin = {}
    
for year in diz_two_years.keys():
    diz_two_years_bin[year] = binarize_df(diz_two_years[year], tw)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for year in diz_two_years.keys():
    for target in sectors:
        oinfo_one_sector = get_o_infos_same(target, diz_two_years_bin[year])
        np.save(
            f"./results/economic_results/o_info/metrics/3/two_years/one_sector_{target}_years_{year}.npy",
            oinfo_one_sector,
        )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAME

years = diz_two_years.keys()
df_two_years = pd.DataFrame(columns=years)

for target in sectors:
    for year in years:
        oinfo_one_sector = np.load(
            f"./results/economic_results/o_info/metrics/3/two_years/one_sector_{target}_years_{year}.npy"
        )
        df_two_years.at[target, year] = np.mean(oinfo_one_sector)

df_two_years.to_csv(
    f"./results/economic_results/o_info/dataframes/3/two_years/dataframe.csv",
    index=True,
)

In [None]:
df_two_years = pd.read_csv(
    "./results/economic_results/o_info/dataframes/3/two_years/dataframe.csv",
    index_col=0,
)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE HEATMAP

sns.heatmap(df_two_years)

plt.xlabel("Year")
plt.ylabel("Source")
plt.title("3 in source (O-info)")

plt.savefig(
    f"./results/economic_results/o_info/images/3/two_years/heatmap.pdf",
    dpi=600,
    bbox_inches="tight",
)

plt.show()

<a name="o_info_3_three_years"></a>
#### Three years

In [None]:
# I remove 1999 and 2021
years = financial_ts["Date"].dt.year.unique()[1:-1]

diz_years = {}

for year in years:
    df_year = financial_ts[financial_ts["Date"].dt.year == year]
    diz_years[year] = df_year
    
diz_three_years = {}

for i in range(0, len(years), 3):
    if i + 2 < len(years):
        start_year = years[i]
        mid_year = years[i + 1]
        end_year = years[i + 2]
        combined_key = f"{start_year}_{mid_year}_{end_year}"

        combined_df = pd.concat([diz_years[start_year], diz_years[mid_year], diz_years[end_year]])

        diz_three_years[combined_key] = combined_df

In [None]:
tw = 7
diz_three_years_bin = {}
    
for year in diz_three_years.keys():
    diz_three_years_bin[year] = binarize_df(diz_three_years[year], tw)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for year in diz_three_years_bin.keys():
    for target in sectors:
        oinfo_one_sector = get_o_infos_same(target, diz_three_years_bin[year])
        np.save(
            f"./results/economic_results/o_info/metrics/3/three_years/one_sector_{target}_years_{year}.npy",
            oinfo_one_sector,
        )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAME

years = diz_three_years_bin.keys()
df_three_years = pd.DataFrame(columns=years)

for target in sectors:
    for year in years:
        oinfo_one_sector = np.load(
            f"./results/economic_results/o_info/metrics/3/three_years/one_sector_{target}_years_{year}.npy"
        )
        df_three_years.at[target, year] = np.mean(oinfo_one_sector)

df_three_years.to_csv(
    f"./results/economic_results/o_info/dataframes/3/three_years/dataframe.csv",
    index=True,
)

In [None]:
df_three_years = pd.read_csv(
    "./results/economic_results/o_info/dataframes/3/three_years/dataframe.csv",
    index_col=0,
)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE HEATMAPS
sns.heatmap(df_three_years)

plt.xlabel("Year")
plt.ylabel("Source")
plt.title("3 in source (O-info)")

plt.savefig(
    f"./results/economic_results/o_info/images/3/three_years/heatmap.pdf",
    dpi=600,
    bbox_inches="tight",
)

plt.show()

<a name="o_info_2_1"></a>
### 2 in Sector A and 1 in Sector B

<a name="o_info_2_1_matrix"></a>
#### Matrix

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for target in sectors:
    other_sectors = [sector for sector in sectors if sector != target]
    for i, sector in enumerate(other_sectors):
        
        oinfo_two_sectors = get_o_infos_two_different(
            target, sector, binarized_financial_ts
        )
        np.save(
            f"./results/economic_results/o_info/metrics/2_1/all/one_sector_{target}_two_sector_{sector}.npy",
            oinfo_two_sectors,
        )

In [None]:
df_matrix_all = pd.DataFrame(
    columns=[
        "Industrials",
        "Basic Materials",
        "Financials",
        "Consumer Services",
        "Health Care",
        "Utilities",
        "Oil & Gas",
        "Consumer Goods",
    ]
)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAME

for target in sectors:
    for i, sector in enumerate(sectors):
        if target == sector:
            oinfo_one_sector = np.load(
                f"./results/economic_results/o_info/metrics/3/all/one_sector_{target}.npy"
            )
            df_matrix_all.at[target, sector] = np.mean(oinfo_one_sector)
        else:
            oinfo_two_sectors = np.load(
                f"./results/economic_results/o_info/metrics/2_1/all/one_sector_{target}_two_sector_{sector}.npy"
            )
            df_matrix_all.at[target, sector] = np.mean(oinfo_two_sectors)


df_matrix_all.to_csv(
    "./results/economic_results/o_info/dataframes/2_1/all/matrix.csv",
    index=True,
)

In [None]:
df_matrix_all = pd.read_csv(
    "./results/economic_results/o_info/dataframes/2_1/all/matrix.csv",
    index_col=0,
)

In [None]:
sns.heatmap(df_matrix_all)

plt.xlabel("Source")
plt.ylabel("Target")

plt.savefig(
    f"./results/economic_results/o_info/images/2_1/all/heatmap_matrix.pdf",
    dpi=600,
    bbox_inches="tight",
)

plt.show()

<a name="o_info_2_1_one_year"></a>
#### One year

In [None]:
financial_ts["Date"] = pd.to_datetime(financial_ts["Date"])

years = financial_ts["Date"].dt.year.unique()[1:]

diz_years = {}

for year in years:
    df_year = financial_ts[financial_ts["Date"].dt.year == year]

    diz_years[year] = df_year

In [None]:
tw = 7
diz_years_bin = {}
    
for year in years:
    diz_years_bin[year] = binarize_df(diz_years[year], tw)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for year in years:
    for target in sectors:
        other_sectors = [sector for sector in sectors if sector != target]
        for i, sector in enumerate(other_sectors):

            oinfo_two_sectors = get_o_infos_two_different(
                target, sector, diz_years_bin[year]
            )
            np.save(
                f"./results/economic_results/o_info/metrics/2_1/one_year/one_sector_{target}_two_sector_{sector}_year_{year}.npy",
                oinfo_two_sectors,
            )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAMES

for target in sectors:
    df_one_year = pd.DataFrame(columns=years)

    for year in years:
        for sector in sectors:

            if target == sector:
                oinfo_one_sector = np.load(
                    f"./results/economic_results/o_info/metrics/3/one_year/one_sector_{target}_year_{year}.npy"
                )
                df_one_year.at[sector, year] = np.mean(oinfo_one_sector)
            else:
                oinfo_two_sectors = np.load(
                    f"./results/economic_results/o_info/metrics/2_1/one_year/one_sector_{target}_two_sector_{sector}_year_{year}.npy"
                )
                df_one_year.at[sector, year] = np.mean(oinfo_two_sectors)

    df_one_year.to_csv(
        f"./results/economic_results/o_info/dataframes/2_1/one_year/dataframe_{target}.csv", index=True
    )

In [None]:
df_industrials = pd.read_csv(
    "./results/economic_results/o_info/dataframes/2_1/one_year/dataframe_Industrials.csv",
    index_col=0,
)
df_basic_materials = pd.read_csv(
    "./results/economic_results/o_info/dataframes/2_1/one_year/dataframe_Basic Materials.csv",
    index_col=0,
)
df_financials = pd.read_csv(
    "./results/economic_results/o_info/dataframes/2_1/one_year/dataframe_Financials.csv",
    index_col=0,
)
df_consumer_services = pd.read_csv(
    "./results/economic_results/o_info/dataframes/2_1/one_year/dataframe_Consumer Services.csv",
    index_col=0,
)
df_health_care = pd.read_csv(
    "./results/economic_results/o_info/dataframes/2_1/one_year/dataframe_Health Care.csv",
    index_col=0,
)
df_utilities = pd.read_csv(
    "./results/economic_results/o_info/dataframes/2_1/one_year/dataframe_Utilities.csv",
    index_col=0,
)
df_oil_gas = pd.read_csv(
    "./results/economic_results/o_info/dataframes/2_1/one_year/dataframe_Oil & Gas.csv",
    index_col=0,
)
df_consumer_goods = pd.read_csv(
    "./results/economic_results/o_info/dataframes/2_1/one_year/dataframe_Consumer Goods.csv",
    index_col=0,
)

df_one_year_all_sectors = {
    "Industrials": df_industrials,
    "Basic Materials": df_basic_materials,
    "Financials": df_financials,
    "Consumer Services": df_consumer_services,
    "Health Care": df_health_care,
    "Utilities": df_utilities,
    "Oil & Gas": df_oil_gas,
    "Consumer Goods": df_consumer_goods,
}

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE HEATMAPS

for sector in sectors:
    sns.heatmap(df_one_year_all_sectors[sector])
    
    plt.axvline(x=1.5, color="yellow", linestyle="--", label="twin towers")
    plt.axvline(x=8.5, color="blue", linestyle="--", label="financial crisis")
    plt.axvline(x=9.5, color="green", linestyle="--", label="Obama president")
    plt.axvline(x=17.5, color="brown", linestyle="--", label="Trump president")
    plt.axvline(x=19.5, color="pink", linestyle="--", label="covid-19")
    plt.axvline(x=21.5, color="cyan", linestyle="--", label="Biden president")

    plt.xlabel("Year")
    plt.ylabel("Source")
    plt.title(f"1 in {sector}, 2 in source (O-info)")

#    plt.legend()

    plt.savefig(
        f"./results/economic_results/o_info/images/2_1/one_year/heatmap_{sector}.pdf",
        dpi=600,
        bbox_inches="tight",
    )

    plt.show()

<a name="o_info_2_1_two_years"></a>
#### Two years

In [None]:
# I remove 1999
years = financial_ts["Date"].dt.year.unique()[1:]

diz_years = {}

for year in years:
    df_year = financial_ts[financial_ts["Date"].dt.year == year]
    diz_years[year] = df_year
    
diz_two_years = {}

for i in range(0, len(years), 2):
    start_year = years[i]
    end_year = years[i + 1]
    combined_key = f"{start_year}_{end_year}"
    
    combined_df = pd.concat([diz_years[start_year], diz_years[end_year]])
    
    diz_two_years[combined_key] = combined_df

In [None]:
tw = 7
diz_two_years_bin = {}
    
for year in diz_two_years.keys():
    diz_two_years_bin[year] = binarize_df(diz_two_years[year], tw)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for year in diz_two_years.keys():
    for target in sectors:
        other_sectors = [sector for sector in sectors if sector != target]
        for i, sector in enumerate(other_sectors):

            oinfo_two_sectors = get_o_infos_two_different(
                target, sector, diz_two_years_bin[year]
            )
            np.save(
                f"./results/economic_results/o_info/metrics/2_1/two_years/one_sector_{target}_two_sector_{sector}_years_{year}.npy",
                oinfo_two_sectors,
            )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAMES

years = diz_two_years.keys()
for target in sectors:
    df_two_years = pd.DataFrame(columns=years)

    for year in years:
        for sector in sectors:

            if target == sector:
                oinfo_one_sector = np.load(
                    f"./results/economic_results/o_info/metrics/3/two_years/one_sector_{target}_years_{year}.npy"
                )
                df_two_years.at[sector, year] = np.mean(oinfo_one_sector)
            else:
                oinfo_two_sectors = np.load(
                    f"./results/economic_results/o_info/metrics/2_1/two_years/one_sector_{target}_two_sector_{sector}_years_{year}.npy"
                )
                df_two_years.at[sector, year] = np.mean(oinfo_two_sectors)

    df_two_years.to_csv(
        f"./results/economic_results/o_info/dataframes/2_1/two_years/dataframe_{target}.csv", index=True
    )

In [None]:
df_industrials = pd.read_csv(
    "./results/economic_results/o_info/dataframes/2_1/two_years/dataframe_Industrials.csv",
    index_col=0,
)
df_basic_materials = pd.read_csv(
    "./results/economic_results/o_info/dataframes/2_1/two_years/dataframe_Basic Materials.csv",
    index_col=0,
)
df_financials = pd.read_csv(
    "./results/economic_results/o_info/dataframes/2_1/two_years/dataframe_Financials.csv",
    index_col=0,
)
df_consumer_services = pd.read_csv(
    "./results/economic_results/o_info/dataframes/2_1/two_years/dataframe_Consumer Services.csv",
    index_col=0,
)
df_health_care = pd.read_csv(
    "./results/economic_results/o_info/dataframes/2_1/two_years/dataframe_Health Care.csv",
    index_col=0,
)
df_utilities = pd.read_csv(
    "./results/economic_results/o_info/dataframes/2_1/two_years/dataframe_Utilities.csv",
    index_col=0,
)
df_oil_gas = pd.read_csv(
    "./results/economic_results/o_info/dataframes/2_1/two_years/dataframe_Oil & Gas.csv",
    index_col=0,
)
df_consumer_goods = pd.read_csv(
    "./results/economic_results/o_info/dataframes/2_1/two_years/dataframe_Consumer Goods.csv",
    index_col=0,
)

df_two_years_all_sectors = {
    "Industrials": df_industrials,
    "Basic Materials": df_basic_materials,
    "Financials": df_financials,
    "Consumer Services": df_consumer_services,
    "Health Care": df_health_care,
    "Utilities": df_utilities,
    "Oil & Gas": df_oil_gas,
    "Consumer Goods": df_consumer_goods,
}

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE HEATMAPS

for sector in sectors:
    sns.heatmap(df_two_years_all_sectors[sector])

    plt.xlabel("Year")
    plt.ylabel("Source")
    plt.title(f"1 in {sector}, 2 in source (O-info)")

    plt.savefig(
        f"./results/economic_results/o_info/images/2_1/two_years/heatmap_{sector}.pdf",
        dpi=600,
        bbox_inches="tight",
    )

    plt.show()

<a name="o_info_2_1_three_years"></a>
#### Three years

In [None]:
# I remove 1999 and 2021
years = financial_ts["Date"].dt.year.unique()[1:-1]

diz_years = {}

for year in years:
    df_year = financial_ts[financial_ts["Date"].dt.year == year]
    diz_years[year] = df_year
    
diz_three_years = {}

for i in range(0, len(years), 3):
    if i + 2 < len(years):
        start_year = years[i]
        mid_year = years[i + 1]
        end_year = years[i + 2]
        combined_key = f"{start_year}_{mid_year}_{end_year}"

        combined_df = pd.concat([diz_years[start_year], diz_years[mid_year], diz_years[end_year]])

        diz_three_years[combined_key] = combined_df

In [None]:
tw = 7
diz_three_years_bin = {}
    
for year in diz_three_years.keys():
    diz_three_years_bin[year] = binarize_df(diz_three_years[year], tw)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for year in diz_three_years_bin.keys():
    for target in sectors:
        other_sectors = [sector for sector in sectors if sector != target]
        for i, sector in enumerate(other_sectors):

            oinfo_two_sectors = get_o_infos_two_different(
                target, sector, diz_three_years_bin[year]
            )
            np.save(
                f"./results/economic_results/o_info/metrics/2_1/three_years/one_sector_{target}_two_sector_{sector}_years_{year}.npy",
                oinfo_two_sectors,
            )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAMES

years = diz_three_years_bin.keys()
for target in sectors:
    df_three_years = pd.DataFrame(columns=years)

    for year in years:
        for sector in sectors:

            if target == sector:
                oinfo_one_sector = np.load(
                    f"./results/economic_results/o_info/metrics/3/three_years/one_sector_{target}_years_{year}.npy"
                )
                df_three_years.at[sector, year] = np.mean(oinfo_one_sector)
            else:
                oinfo_two_sectors = np.load(
                    f"./results/economic_results/o_info/metrics/2_1/three_years/one_sector_{target}_two_sector_{sector}_years_{year}.npy"
                )
                df_three_years.at[sector, year] = np.mean(oinfo_two_sectors)

    df_three_years.to_csv(
        f"./results/economic_results/o_info/dataframes/2_1/three_years/dataframe_{target}.csv", index=True
    )

In [None]:
df_industrials = pd.read_csv(
    "./results/economic_results/o_info/dataframes/2_1/three_years/dataframe_Industrials.csv",
    index_col=0,
)
df_basic_materials = pd.read_csv(
    "./results/economic_results/o_info/dataframes/2_1/three_years/dataframe_Basic Materials.csv",
    index_col=0,
)
df_financials = pd.read_csv(
    "./results/economic_results/o_info/dataframes/2_1/three_years/dataframe_Financials.csv",
    index_col=0,
)
df_consumer_services = pd.read_csv(
    "./results/economic_results/o_info/dataframes/2_1/three_years/dataframe_Consumer Services.csv",
    index_col=0,
)
df_health_care = pd.read_csv(
    "./results/economic_results/o_info/dataframes/2_1/three_years/dataframe_Health Care.csv",
    index_col=0,
)
df_utilities = pd.read_csv(
    "./results/economic_results/o_info/dataframes/2_1/three_years/dataframe_Utilities.csv",
    index_col=0,
)
df_oil_gas = pd.read_csv(
    "./results/economic_results/o_info/dataframes/2_1/three_years/dataframe_Oil & Gas.csv",
    index_col=0,
)
df_consumer_goods = pd.read_csv(
    "./results/economic_results/o_info/dataframes/2_1/three_years/dataframe_Consumer Goods.csv",
    index_col=0,
)

df_three_years_all_sectors = {
    "Industrials": df_industrials,
    "Basic Materials": df_basic_materials,
    "Financials": df_financials,
    "Consumer Services": df_consumer_services,
    "Health Care": df_health_care,
    "Utilities": df_utilities,
    "Oil & Gas": df_oil_gas,
    "Consumer Goods": df_consumer_goods,
}

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE HEATMAPS

for sector in sectors:
    sns.heatmap(df_three_years_all_sectors[sector])

    plt.xlabel("Year")
    plt.ylabel("Source")
    plt.title(f"1 in {sector}, 2 in source (O-info)")

    plt.savefig(
        f"./results/economic_results/o_info/images/2_1/three_years/heatmap_{sector}.pdf",
        dpi=600,
        bbox_inches="tight",
    )

    plt.show()

<a name="o_info_1_1_1"></a>
### 1 in Sector A, 1 in Sector B and 1 in Sector C

<a name="o_info_1_1_1_matrix"></a>
#### Matrix

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for target in sectors:
    other_sectors = [sector for sector in sectors if sector != target]
    for sector1 in other_sectors:
        other_sectors2 = [sector for sector in other_sectors if sector != sector1]
        for sector2 in other_sectors2:

            oinfo_three_sectors = get_o_infos_three_different(
                target, sector1, sector2, binarized_financial_ts
            )
            np.save(
                f"./results/economic_results/o_info/metrics/1_1_1/all/one_sector_{target}_one_sector_{sector1}_one_sector_{sector2}.npy",
                oinfo_three_sectors,
            )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAMES

for target1 in sectors:
    df_matrix = pd.DataFrame(
        columns=[
            "Industrials",
            "Basic Materials",
            "Financials",
            "Consumer Services",
            "Health Care",
            "Utilities",
            "Oil & Gas",
            "Consumer Goods",
        ]
    )
    for target2 in sectors:
        for source in sectors:

            if target2 == target1 and source == target1:
                oinfo_one_sector = np.load(
                    f"./results/economic_results/o_info/metrics/3/all/one_sector_{target2}.npy"
                )
                df_matrix.at[target2, source] = np.mean(oinfo_one_sector)

            elif target2 == target1 and source != target1:
                oinfo_two_sectors = np.load(
                    f"./results/economic_results/o_info/metrics/2_1/all/one_sector_{source}_two_sector_{target2}.npy"
                )
                df_matrix.at[target2, source] = np.mean(oinfo_two_sectors)

            elif target2 != target1 and source == target1:
                oinfo_two_sectors = np.load(
                    f"./results/economic_results/o_info/metrics/2_1/all/one_sector_{target2}_two_sector_{source}.npy"
                )
                df_matrix.at[target2, source] = np.mean(oinfo_two_sectors)

            elif target2 != target1 and source == target2:
                oinfo_two_sectors = np.load(
                    f"./results/economic_results/o_info/metrics/2_1/all/one_sector_{target1}_two_sector_{target2}.npy"
                )
                df_matrix.at[target2, source] = np.mean(oinfo_two_sectors)

            elif target2 != target1 and source != target2:
                oinfo_three_sectors = np.load(
                    f"./results/economic_results/o_info/metrics/1_1_1/all/one_sector_{target1}_one_sector_{target2}_one_sector_{source}.npy"
                )
                df_matrix.at[target2, source] = np.mean(oinfo_three_sectors)

        df_matrix.to_csv(f"./results/economic_results/o_info/dataframes/1_1_1/all/matrix_{target1}.csv", index=True)

In [None]:
df_industrials = pd.read_csv("./results/economic_results/o_info/dataframes/1_1_1/all/matrix_Industrials.csv", index_col=0)
df_basic_materials = pd.read_csv("./results/economic_results/o_info/dataframes/1_1_1/all/matrix_Basic Materials.csv", index_col=0)
df_financials = pd.read_csv("./results/economic_results/o_info/dataframes/1_1_1/all/matrix_Financials.csv", index_col=0)
df_consumer_services = pd.read_csv("./results/economic_results/o_info/dataframes/1_1_1/all/matrix_Consumer Services.csv", index_col=0)
df_health_care = pd.read_csv("./results/economic_results/o_info/dataframes/1_1_1/all/matrix_Health Care.csv", index_col=0)
df_utilities = pd.read_csv("./results/economic_results/o_info/dataframes/1_1_1/all/matrix_Utilities.csv", index_col=0)
df_oil_gas = pd.read_csv("./results/economic_results/o_info/dataframes/1_1_1/all/matrix_Oil & Gas.csv", index_col=0)
df_consumer_goods = pd.read_csv("./results/economic_results/o_info/dataframes/1_1_1/all/matrix_Consumer Goods.csv", index_col=0)

diz_matrix = {
    "Industrials": df_industrials,
    "Basic Materials": df_basic_materials,
    "Financials": df_financials,
    "Consumer Services": df_consumer_services,
    "Health Care": df_health_care,
    "Utilities": df_utilities,
    "Oil & Gas": df_oil_gas,
    "Consumer Goods": df_consumer_goods,
}

In [None]:
for sector in sectors:
    sns.heatmap(diz_matrix[sector])
    
    plt.xlabel("Source")
    plt.ylabel("Target")
    plt.title(f"1 in {sector}, 1 in Target, 1 in Source")
    
    plt.savefig(
        f"./results/economic_results/o_info/images/1_1_1/all/heatmap_matrix_{sector}.pdf",
        dpi=600,
        bbox_inches="tight",
    )
    
    plt.show()

In [None]:
for sector in sectors:
    target_stocks = list(
        stock_names[stock_names["Sector"] == sector]["Stock name"]
    )
    same_sector_triplets = list(combinations(target_stocks, 3))
    print(f"Total number of triplets in {sector}: {len(same_sector_triplets)}")

<a name="o_info_1_1_1_matrix_bootstrapping"></a>
#### Bootstrapping

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAMES

num_samples = 1000  # samples of the bootstrapping
sample_size = 35

for target1 in sectors:
    df_variance = pd.DataFrame(
        columns=[
            "Industrials",
            "Basic Materials",
            "Financials",
            "Consumer Services",
            "Health Care",
            "Utilities",
            "Oil & Gas",
            "Consumer Goods",
        ]
    )
    for target2 in sectors:
        for source in sectors:

            if target2 == target1 and source == target1:
                oinfo_one_sector = np.load(
                    f"./results/economic_results/o_info/metrics/3/all/one_sector_{target2}.npy"
                )
                sample_means = []
                for _ in range(num_samples):
                    sample = np.random.choice(oinfo_one_sector, size=sample_size, replace=True)
                    sample_mean = np.mean(sample)
                    sample_means.append(sample_mean)
                df_variance.at[target2, source] = np.var(sample_means)

            elif target2 == target1 and source != target1:
                oinfo_two_sectors = np.load(
                    f"./results/economic_results/o_info/metrics/2_1/all/one_sector_{source}_two_sector_{target2}.npy"
                )
                sample_means = []
                for _ in range(num_samples):
                    sample = np.random.choice(oinfo_two_sectors, size=sample_size, replace=True)
                    sample_mean = np.mean(sample)
                    sample_means.append(sample_mean)
                df_variance.at[target2, source] = np.var(sample_means)

            elif target2 != target1 and source == target1:
                oinfo_two_sectors = np.load(
                    f"./results/economic_results/o_info/metrics/2_1/all/one_sector_{target2}_two_sector_{source}.npy"
                )
                sample_means = []
                for _ in range(num_samples):
                    sample = np.random.choice(oinfo_two_sectors, size=sample_size, replace=True)
                    sample_mean = np.mean(sample)
                    sample_means.append(sample_mean)
                df_variance.at[target2, source] = np.var(sample_means)

            elif target2 != target1 and source == target2:
                oinfo_two_sectors = np.load(
                    f"./results/economic_results/o_info/metrics/2_1/all/one_sector_{target1}_two_sector_{target2}.npy"
                )
                sample_means = []
                for _ in range(num_samples):
                    sample = np.random.choice(oinfo_two_sectors, size=sample_size, replace=True)
                    sample_mean = np.mean(sample)
                    sample_means.append(sample_mean)
                df_variance.at[target2, source] = np.var(sample_means)

            elif target2 != target1 and source != target2:
                oinfo_three_sectors = np.load(
                    f"./results/economic_results/o_info/metrics/1_1_1/all/one_sector_{target1}_one_sector_{target2}_one_sector_{source}.npy"
                )
                sample_means = []
                for _ in range(num_samples):
                    sample = np.random.choice(oinfo_three_sectors, size=sample_size, replace=True)
                    sample_mean = np.mean(sample)
                    sample_means.append(sample_mean)
                df_variance.at[target2, source] = np.var(sample_means)

        df_variance.to_csv(f"./results/economic_results/o_info/dataframes/1_1_1/all/matrix_variance_{target1}.csv", index=True)

In [None]:
df_variance

<a name="o_info_1_1_1_matrix_pdf"></a>
#### Probability density function

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DICTIONARY

diz_pdf = {
    "Industrials": None,
    "Basic Materials": None,
    "Financials": None,
    "Consumer Services": None,
    "Health Care": None,
    "Utilities": None,
    "Oil & Gas": None,
    "Consumer Goods": None,
}

for target in sectors:
    diz_sector = {"3": None, "2_1": None, "1_1_1": None}

    for key in diz_sector.keys():
        if key == "3":  # 3 in sector
            # PER ORA I 35 VALORI LI TENGO TUTTI, VEDERE SE CONVIENE FARNE LA MEDIA
            diz_sector[key] = list(
                np.load(
                    f"./results/economic_results/o_info/metrics/3/all/one_sector_{target}.npy"
                )
            )

        elif key == "2_1":  # 2 in sector 1 in other
            a = list()
            other_sectors = [sector for sector in sectors if sector != target]
            for source in other_sectors:
                # PER ORA I 35 VALORI LI TENGO TUTTI, VEDERE SE CONVIENE FARNE LA MEDIA
                b = list(
                    np.load(
                        f"./results/economic_results/o_info/metrics/2_1/all/one_sector_{source}_two_sector_{target}.npy"
                    )
                )
                a.append(b)
            diz_sector[key] = [item for sublist in a for item in sublist]

        elif key == "1_1_1":
            a = list()
            other_sectors1 = [sector for sector in sectors if sector != target]
            for source1 in other_sectors1:
                other_sectors2 = [
                    sector for sector in other_sectors1 if sector != source1
                ]
                for source2 in other_sectors2:
                    # PER ORA I 35 VALORI LI TENGO TUTTI, VEDERE SE CONVIENE FARNE LA MEDIA
                    b = list(
                        np.load(
                            f"./results/economic_results/o_info/metrics/1_1_1/all/one_sector_{target}_one_sector_{source1}_one_sector_{source2}.npy"
                        )
                    )
                    a.append(b)
            diz_sector[key] = [item for sublist in a for item in sublist]

    diz_pdf[target] = diz_sector

with open(
    "./results/economic_results/o_info/dataframes/1_1_1/all/dict_proba_density_function_all.pickle",
    "wb",
) as file:
    pickle.dump(diz_pdf, file)

In [None]:
with open(
    "./results/economic_results/o_info/dataframes/1_1_1/all/dict_proba_density_function_all.pickle",
    "rb",
) as file:
    diz_pdf = pickle.load(file)

In [None]:
for sector in sectors:
    keys = list(diz_pdf[sector].keys())
    data = [diz_pdf[sector][key] for key in keys]

    plt.figure(figsize=(8, 6))
    label = [f"3 in {sector}", f"2 in {sector}", f"1 in {sector}"]
    plt.hist(data, bins=30, label=label)
    plt.xlabel("O-information", size=15)
    plt.ylabel("Frequency", size=15)
    plt.tick_params(axis="both", labelsize=12)
    plt.title(f"PDF of {sector}", size=18)
    plt.legend()

    plt.savefig(
        f"./results/economic_results/o_info/images/1_1_1/all/PDF_all_{sector}.pdf",
        dpi=600,
        bbox_inches="tight",
    )

    plt.show()

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DICTIONARY

diz_pdf = {
    "Industrials": None,
    "Basic Materials": None,
    "Financials": None,
    "Consumer Services": None,
    "Health Care": None,
    "Utilities": None,
    "Oil & Gas": None,
    "Consumer Goods": None,
}

for target in sectors:
    diz_sector = {"3": None, "2_1": None, "1_1_1": None}

    for key in diz_sector.keys():
        if key == "3":  # 3 in sector
            b = list(
                np.load(
                    f"./results/economic_results/o_info/metrics/3/all/one_sector_{target}.npy"
                )
            )
            diz_sector[key] = np.mean(b)

        elif key == "2_1":  # 2 in sector 1 in other
            a = list()
            other_sectors = [sector for sector in sectors if sector != target]
            for source in other_sectors:
                b = list(
                    np.load(
                        f"./results/economic_results/o_info/metrics/2_1/all/one_sector_{source}_two_sector_{target}.npy"
                    )
                )
                a.append(np.mean(b))
            diz_sector[key] = list(a)

        elif key == "1_1_1":
            a = list()
            other_sectors1 = [sector for sector in sectors if sector != target]
            for source1 in other_sectors1:
                other_sectors2 = [
                    sector for sector in other_sectors1 if sector != source1
                ]
                for source2 in other_sectors2:
                    b = list(
                        np.load(
                            f"./results/economic_results/o_info/metrics/1_1_1/all/one_sector_{target}_one_sector_{source1}_one_sector_{source2}.npy"
                        )
                    )
                    a.append(np.mean(b))
            diz_sector[key] = list(a)

    diz_pdf[target] = diz_sector

with open(
    "./results/economic_results/o_info/dataframes/1_1_1/all/dict_proba_density_function_mean.pickle",
    "wb",
) as file:
    pickle.dump(diz_pdf, file)

In [None]:
with open(
    "./results/economic_results/o_info/dataframes/1_1_1/all/dict_proba_density_function_mean.pickle",
    "rb",
) as file:
    diz_pdf = pickle.load(file)

In [None]:
for sector in sectors:
    keys = list(diz_pdf[sector].keys())
    data = [diz_pdf[sector][key] for key in keys]

    plt.figure(figsize=(8, 6))
    label = [f"3 in {sector}", f"2 in {sector}", f"1 in {sector}"]
    plt.hist(data, bins=20, label=label)
    plt.xlabel("O-information", size=15)
    plt.ylabel("Frequency", size=15)
    plt.tick_params(axis="both", labelsize=12)
    plt.title(f"PDF of {sector} (mean)", size=18)
    plt.legend()

    plt.savefig(
        f"./results/economic_results/o_info/images/1_1_1/all/PDF_mean_{sector}.pdf",
        dpi=600,
        bbox_inches="tight",
    )

    plt.show()

<a name="o_info_1_1_1_matrix_gaussian"></a>
#### Gaussian distribution

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for target in sectors:
    oinfo_one_sector = get_o_infos_same(target, binarized_financial_ts_gauss)
    np.save(
        f"./results/economic_results/o_info/metrics/3/all/gaussian/one_sector_{target}.npy",
        oinfo_one_sector,
    )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for target in sectors:
    other_sectors = [sector for sector in sectors if sector != target]
    for i, sector in enumerate(other_sectors):
        
        oinfo_two_sectors = get_o_infos_two_different(
            target, sector, binarized_financial_ts_gauss
        )
        np.save(
            f"./results/economic_results/o_info/metrics/2_1/all/gaussian/one_sector_{target}_two_sector_{sector}.npy",
            oinfo_two_sectors,
        )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for target in sectors:
    other_sectors = [sector for sector in sectors if sector != target]
    for sector1 in other_sectors:
        other_sectors2 = [sector for sector in other_sectors if sector != sector1]
        for sector2 in other_sectors2:

            oinfo_three_sectors = get_o_infos_three_different(
                target, sector1, sector2, binarized_financial_ts_gauss
            )
            np.save(
                f"./results/economic_results/o_info/metrics/1_1_1/all/gaussian/one_sector_{target}_one_sector_{sector1}_one_sector_{sector2}.npy",
                oinfo_three_sectors,
            )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAMES

for target1 in sectors:
    df_matrix = pd.DataFrame(
        columns=[
            "Industrials",
            "Basic Materials",
            "Financials",
            "Consumer Services",
            "Health Care",
            "Utilities",
            "Oil & Gas",
            "Consumer Goods",
        ]
    )
    for target2 in sectors:
        for source in sectors:

            if target2 == target1 and source == target1:
                oinfo_one_sector = np.load(
                    f"./results/economic_results/o_info/metrics/3/all/gaussian/one_sector_{target2}.npy"
                )
                df_matrix.at[target2, source] = np.mean(oinfo_one_sector)

            elif target2 == target1 and source != target1:
                oinfo_two_sectors = np.load(
                    f"./results/economic_results/o_info/metrics/2_1/all/gaussian/one_sector_{source}_two_sector_{target2}.npy"
                )
                df_matrix.at[target2, source] = np.mean(oinfo_two_sectors)

            elif target2 != target1 and source == target1:
                oinfo_two_sectors = np.load(
                    f"./results/economic_results/o_info/metrics/2_1/all/gaussian/one_sector_{target2}_two_sector_{source}.npy"
                )
                df_matrix.at[target2, source] = np.mean(oinfo_two_sectors)

            elif target2 != target1 and source == target2:
                oinfo_two_sectors = np.load(
                    f"./results/economic_results/o_info/metrics/2_1/all/gaussian/one_sector_{target1}_two_sector_{target2}.npy"
                )
                df_matrix.at[target2, source] = np.mean(oinfo_two_sectors)

            elif target2 != target1 and source != target2:
                oinfo_three_sectors = np.load(
                    f"./results/economic_results/o_info/metrics/1_1_1/all/gaussian/one_sector_{target1}_one_sector_{target2}_one_sector_{source}.npy"
                )
                df_matrix.at[target2, source] = np.mean(oinfo_three_sectors)

        df_matrix.to_csv(f"./results/economic_results/o_info/dataframes/1_1_1/all/gaussian/matrix_{target1}.csv", index=True)

In [None]:
df_industrials = pd.read_csv("./results/economic_results/o_info/dataframes/1_1_1/all/gaussian/matrix_Industrials.csv", index_col=0)
df_basic_materials = pd.read_csv("./results/economic_results/o_info/dataframes/1_1_1/all/gaussian/matrix_Basic Materials.csv", index_col=0)
df_financials = pd.read_csv("./results/economic_results/o_info/dataframes/1_1_1/all/gaussian/matrix_Financials.csv", index_col=0)
df_consumer_services = pd.read_csv("./results/economic_results/o_info/dataframes/1_1_1/all/gaussian/matrix_Consumer Services.csv", index_col=0)
df_health_care = pd.read_csv("./results/economic_results/o_info/dataframes/1_1_1/all/gaussian/matrix_Health Care.csv", index_col=0)
df_utilities = pd.read_csv("./results/economic_results/o_info/dataframes/1_1_1/all/gaussian/matrix_Utilities.csv", index_col=0)
df_oil_gas = pd.read_csv("./results/economic_results/o_info/dataframes/1_1_1/all/gaussian/matrix_Oil & Gas.csv", index_col=0)
df_consumer_goods = pd.read_csv("./results/economic_results/o_info/dataframes/1_1_1/all/gaussian/matrix_Consumer Goods.csv", index_col=0)

diz_matrix = {
    "Industrials": df_industrials,
    "Basic Materials": df_basic_materials,
    "Financials": df_financials,
    "Consumer Services": df_consumer_services,
    "Health Care": df_health_care,
    "Utilities": df_utilities,
    "Oil & Gas": df_oil_gas,
    "Consumer Goods": df_consumer_goods,
}

In [None]:
for sector in sectors:
    sns.heatmap(diz_matrix[sector])
    
    plt.xlabel("Source")
    plt.ylabel("Target")
    plt.title(f"1 in {sector}, 1 in Target, 1 in Source")
    
    plt.savefig(
        f"./results/economic_results/o_info/images/1_1_1/all/gaussian/heatmap_matrix_{sector}.pdf",
        dpi=600,
        bbox_inches="tight",
    )
    
    plt.show()

<a name="o_info_1_1_1_matrix_all_triplets"></a>
#### All number of triplets

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for target in sectors:
    oinfo_one_sector = get_o_infos_same(target, binarized_financial_ts, total=True)
    np.save(
        f"./results/economic_results/o_info/metrics/3/all/all_triplets/one_sector_{target}.npy",
        oinfo_one_sector,
    )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for target in sectors:
    other_sectors = [sector for sector in sectors if sector != target]
    for i, sector in enumerate(other_sectors):
        
        oinfo_two_sectors = get_o_infos_two_different(
            target, sector, binarized_financial_ts, total=True
        )
        np.save(
            f"./results/economic_results/o_info/metrics/2_1/all/all_triplets/one_sector_{target}_two_sector_{sector}.npy",
            oinfo_two_sectors,
        )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for target in sectors:
    other_sectors = [sector for sector in sectors if sector != target]
    for sector1 in other_sectors:
        other_sectors2 = [sector for sector in other_sectors if sector != sector1]
        for sector2 in other_sectors2:

            oinfo_three_sectors = get_o_infos_three_different(
                target, sector1, sector2, binarized_financial_ts, total=True
            )
            np.save(
                f"./results/economic_results/o_info/metrics/1_1_1/all/all_triplets/one_sector_{target}_one_sector_{sector1}_one_sector_{sector2}.npy",
                oinfo_three_sectors,
            )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAMES

for target1 in sectors:
    df_matrix = pd.DataFrame(
        columns=[
            "Industrials",
            "Basic Materials",
            "Financials",
            "Consumer Services",
            "Health Care",
            "Utilities",
            "Oil & Gas",
            "Consumer Goods",
        ]
    )
    for target2 in sectors:
        for source in sectors:

            if target2 == target1 and source == target1:
                oinfo_one_sector = np.load(
                    f"./results/economic_results/o_info/metrics/3/all/all_triplets/one_sector_{target2}.npy"
                )
                df_matrix.at[target2, source] = np.mean(oinfo_one_sector)

            elif target2 == target1 and source != target1:
                oinfo_two_sectors = np.load(
                    f"./results/economic_results/o_info/metrics/2_1/all/all_triplets/one_sector_{source}_two_sector_{target2}.npy"
                )
                df_matrix.at[target2, source] = np.mean(oinfo_two_sectors)

            elif target2 != target1 and source == target1:
                oinfo_two_sectors = np.load(
                    f"./results/economic_results/o_info/metrics/2_1/all/all_triplets/one_sector_{target2}_two_sector_{source}.npy"
                )
                df_matrix.at[target2, source] = np.mean(oinfo_two_sectors)

            elif target2 != target1 and source == target2:
                oinfo_two_sectors = np.load(
                    f"./results/economic_results/o_info/metrics/2_1/all/all_triplets/one_sector_{target1}_two_sector_{target2}.npy"
                )
                df_matrix.at[target2, source] = np.mean(oinfo_two_sectors)

            elif target2 != target1 and source != target2:
                oinfo_three_sectors = np.load(
                    f"./results/economic_results/o_info/metrics/1_1_1/all/all_triplets/one_sector_{target1}_one_sector_{target2}_one_sector_{source}.npy"
                )
                df_matrix.at[target2, source] = np.mean(oinfo_three_sectors)

        df_matrix.to_csv(f"./results/economic_results/o_info/dataframes/1_1_1/all/all_triplets/matrix_{target1}.csv", index=True)

In [None]:
df_industrials = pd.read_csv("./results/economic_results/o_info/dataframes/1_1_1/all/all_triplets/matrix_Industrials.csv", index_col=0)
df_basic_materials = pd.read_csv("./results/economic_results/o_info/dataframes/1_1_1/all/all_triplets/matrix_Basic Materials.csv", index_col=0)
df_financials = pd.read_csv("./results/economic_results/o_info/dataframes/1_1_1/all/all_triplets/matrix_Financials.csv", index_col=0)
df_consumer_services = pd.read_csv("./results/economic_results/o_info/dataframes/1_1_1/all/all_triplets/matrix_Consumer Services.csv", index_col=0)
df_health_care = pd.read_csv("./results/economic_results/o_info/dataframes/1_1_1/all/all_triplets/matrix_Health Care.csv", index_col=0)
df_utilities = pd.read_csv("./results/economic_results/o_info/dataframes/1_1_1/all/all_triplets/matrix_Utilities.csv", index_col=0)
df_oil_gas = pd.read_csv("./results/economic_results/o_info/dataframes/1_1_1/all/all_triplets/matrix_Oil & Gas.csv", index_col=0)
df_consumer_goods = pd.read_csv("./results/economic_results/o_info/dataframes/1_1_1/all/all_triplets/matrix_Consumer Goods.csv", index_col=0)

diz_matrix = {
    "Industrials": df_industrials,
    "Basic Materials": df_basic_materials,
    "Financials": df_financials,
    "Consumer Services": df_consumer_services,
    "Health Care": df_health_care,
    "Utilities": df_utilities,
    "Oil & Gas": df_oil_gas,
    "Consumer Goods": df_consumer_goods,
}

In [None]:
for sector in sectors:
    sns.heatmap(diz_matrix[sector])
    
    plt.xlabel("Source")
    plt.ylabel("Target")
    plt.title(f"1 in {sector}, 1 in Target, 1 in Source")
    
    plt.savefig(
        f"./results/economic_results/o_info/images/1_1_1/all/all_triplets/heatmap_matrix_{sector}.pdf",
        dpi=600,
        bbox_inches="tight",
    )
    
    plt.show()

<a name="o_info_1_1_1_one_year"></a>
#### One year

In [None]:
financial_ts["Date"] = pd.to_datetime(financial_ts["Date"])
years = financial_ts["Date"].dt.year.unique()[1:]

diz_years = {}

for year in years:
    df_year = financial_ts[financial_ts["Date"].dt.year == year]
    diz_years[year] = df_year

In [None]:
tw = 7
diz_years_bin = {}
    
for year in years:
    diz_years_bin[year] = binarize_df(diz_years[year], tw)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for year in years:
    for target in sectors:
        other_sectors = [sector for sector in sectors if sector != target]
        for sector1 in other_sectors:
            other_sectors2 = [sector for sector in other_sectors if sector != sector1]
            for sector2 in other_sectors2:

                oinfo_three_sectors = get_o_infos_three_different(
                    target, sector1, sector2, diz_years_bin[year]
                )
                np.save(
                    f"./results/economic_results/o_info/metrics/1_1_1/one_year/one_sector_{target}_one_sector_{sector1}_one_sector_{sector2}_year_{year}.npy",
                    oinfo_three_sectors,
                )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAMES

for target1 in sectors:
    for target2 in sectors:
        df_one_year = pd.DataFrame(columns=years)
        for year in years:
            for source in sectors:

                if target2 == target1 and source == target1:
                    oinfo_one_sector = np.load(
                        f"./results/economic_results/o_info/metrics/3/one_year/one_sector_{target2}_year_{year}.npy"
                    )
                    df_one_year.at[source, year] = np.mean(oinfo_one_sector)

                elif target2 == target1 and source != target1:
                    oinfo_two_sectors = np.load(
                        f"./results/economic_results/o_info/metrics/2_1/one_year/one_sector_{source}_two_sector_{target2}_year_{year}.npy"
                    )
                    df_one_year.at[source, year] = np.mean(oinfo_two_sectors)

                elif target2 != target1 and source == target1:
                    oinfo_two_sectors = np.load(
                        f"./results/economic_results/o_info/metrics/2_1/one_year/one_sector_{target2}_two_sector_{source}_year_{year}.npy"
                    )
                    df_one_year.at[source, year] = np.mean(oinfo_two_sectors)

                elif target2 != target1 and source == target2:
                    oinfo_two_sectors = np.load(
                        f"./results/economic_results/o_info/metrics/2_1/one_year/one_sector_{target1}_two_sector_{target2}_year_{year}.npy"
                    )
                    df_one_year.at[source, year] = np.mean(oinfo_two_sectors)

                elif target2 != target1 and source != target2:
                    oinfo_three_sectors = np.load(
                        f"./results/economic_results/o_info/metrics/1_1_1/one_year/one_sector_{target1}_one_sector_{target2}_one_sector_{source}_year_{year}.npy"
                    )
                    df_one_year.at[source, year] = np.mean(oinfo_three_sectors)

        df_one_year.to_csv(
            f"./results/economic_results/o_info/dataframes/1_1_1/one_year/dataframe_{target1}_{target2}.csv",
            index=True,
        )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE HEATMAPS

for target1 in sectors:
    for target2 in sectors:

        df = pd.read_csv(
            f"./results/economic_results/o_info/dataframes/1_1_1/one_year/dataframe_{target1}_{target2}.csv",
            index_col=0,
        )

        sns.heatmap(df)

        plt.axvline(x=1.5, color="yellow", linestyle="--", label="twin towers")
        plt.axvline(x=8.5, color="blue", linestyle="--", label="financial crisis")
        plt.axvline(x=9.5, color="green", linestyle="--", label="Obama president")
        plt.axvline(x=17.5, color="brown", linestyle="--", label="Trump president")
        plt.axvline(x=11.5, color="darkorchid", linestyle="--", label="Crisi debito sovrano Europa / Quantitative easing USA")
        plt.axvline(x=19.5, color="pink", linestyle="--", label="covid-19")
        plt.axvline(x=21.5, color="cyan", linestyle="--", label="Biden president")

        plt.xlabel("Year")
        plt.ylabel("Source")
        plt.title(f"1 in {target1}, 1 in {target2}, 1 in source (O-info)")

        # plt.legend()

        plt.savefig(
            f"./results/economic_results/o_info/images/1_1_1/one_year/heatmap_{target1}_{target2}.pdf",
            dpi=600,
            bbox_inches="tight",
        )

        plt.show()

<a name="o_info_1_1_1_one_year_gaussian"></a>
##### Gaussian distribution

In [None]:
financial_ts_gauss["Date"] = pd.to_datetime(financial_ts_gauss["Date"])
years = financial_ts_gauss["Date"].dt.year.unique()[1:]

diz_years = {}

for year in years:
    df_year = financial_ts_gauss[financial_ts_gauss["Date"].dt.year == year]
    diz_years[year] = df_year

In [None]:
tw = 7
diz_one_year_gaussian_bin = {}
    
for year in years:
    diz_one_year_gaussian_bin[year] = binarize_df(diz_years[year], tw)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

# 3
for year in years:
    for target in sectors:
        oinfo_one_sector = get_o_infos_same(target, diz_one_year_gaussian_bin[year])
        np.save(
            f"./results/economic_results/o_info/metrics/3/one_year/gaussian/one_sector_{target}_year_{year}.npy",
            oinfo_one_sector,
        )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

# 2 vs 1
for year in years:
    for target in sectors:
        other_sectors = [sector for sector in sectors if sector != target]
        for i, sector in enumerate(other_sectors):

            oinfo_two_sectors = get_o_infos_two_different(
                target, sector, diz_one_year_gaussian_bin[year]
            )
            np.save(
                f"./results/economic_results/o_info/metrics/2_1/one_year/gaussian/one_sector_{target}_two_sector_{sector}_year_{year}.npy",
                oinfo_two_sectors,
            )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

# 1 vs 1 vs 1
for year in years:
    for target in sectors:
        other_sectors = [sector for sector in sectors if sector != target]
        for sector1 in other_sectors:
            other_sectors2 = [sector for sector in other_sectors if sector != sector1]
            for sector2 in other_sectors2:

                oinfo_three_sectors = get_o_infos_three_different(
                    target, sector1, sector2, diz_one_year_gaussian_bin[year]
                )
                np.save(
                    f"./results/economic_results/o_info/metrics/1_1_1/one_year/gaussian/one_sector_{target}_one_sector_{sector1}_one_sector_{sector2}_year_{year}.npy",
                    oinfo_three_sectors,
                )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAMES

for target1 in sectors:
    for target2 in sectors:
        df_one_year = pd.DataFrame(columns=years)
        for year in years:
            for source in sectors:

                if target2 == target1 and source == target1:
                    oinfo_one_sector = np.load(
                        f"./results/economic_results/o_info/metrics/3/one_year/gaussian/one_sector_{target2}_year_{year}.npy"
                    )
                    df_one_year.at[source, year] = np.mean(oinfo_one_sector)

                elif target2 == target1 and source != target1:
                    oinfo_two_sectors = np.load(
                        f"./results/economic_results/o_info/metrics/2_1/one_year/gaussian/one_sector_{source}_two_sector_{target2}_year_{year}.npy"
                    )
                    df_one_year.at[source, year] = np.mean(oinfo_two_sectors)

                elif target2 != target1 and source == target1:
                    oinfo_two_sectors = np.load(
                        f"./results/economic_results/o_info/metrics/2_1/one_year/gaussian/one_sector_{target2}_two_sector_{source}_year_{year}.npy"
                    )
                    df_one_year.at[source, year] = np.mean(oinfo_two_sectors)

                elif target2 != target1 and source == target2:
                    oinfo_two_sectors = np.load(
                        f"./results/economic_results/o_info/metrics/2_1/one_year/gaussian/one_sector_{target1}_two_sector_{target2}_year_{year}.npy"
                    )
                    df_one_year.at[source, year] = np.mean(oinfo_two_sectors)

                elif target2 != target1 and source != target2:
                    oinfo_three_sectors = np.load(
                        f"./results/economic_results/o_info/metrics/1_1_1/one_year/gaussian/one_sector_{target1}_one_sector_{target2}_one_sector_{source}_year_{year}.npy"
                    )
                    df_one_year.at[source, year] = np.mean(oinfo_three_sectors)

        df_one_year.to_csv(
            f"./results/economic_results/o_info/dataframes/1_1_1/one_year/gaussian/dataframe_{target1}_{target2}.csv",
            index=True,
        )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE HEATMAPS

for target1 in sectors:
    for target2 in sectors:

        df = pd.read_csv(
            f"./results/economic_results/o_info/dataframes/1_1_1/one_year/gaussian/dataframe_{target1}_{target2}.csv",
            index_col=0,
        )

        sns.heatmap(df)

        plt.axvline(x=1.5, color="yellow", linestyle="--", label="twin towers")
        plt.axvline(x=8.5, color="blue", linestyle="--", label="financial crisis")
        plt.axvline(x=9.5, color="green", linestyle="--", label="Obama president")
        plt.axvline(x=17.5, color="brown", linestyle="--", label="Trump president")
        plt.axvline(x=11.5, color="darkorchid", linestyle="--", label="Crisi debito sovrano Europa / Quantitative easing USA")
        plt.axvline(x=19.5, color="pink", linestyle="--", label="covid-19")
        plt.axvline(x=21.5, color="cyan", linestyle="--", label="Biden president")

        plt.xlabel("Year")
        plt.ylabel("Source")
        plt.title(f"Gaussian: 1 in {target1}, 1 in {target2}, 1 in source (O-info)")

        # plt.legend()

        plt.savefig(
            f"./results/economic_results/o_info/images/1_1_1/one_year/gaussian/heatmap_{target1}_{target2}.pdf",
            dpi=600,
            bbox_inches="tight",
        )

        plt.show()

<a name="o_info_1_1_1_one_year_all_triplets"></a>
##### All number of triplets

In [None]:
financial_ts["Date"] = pd.to_datetime(financial_ts["Date"])

years = financial_ts["Date"].dt.year.unique()[1:]

diz_years = {}

for year in years:
    df_year = financial_ts[financial_ts["Date"].dt.year == year]

    diz_years[year] = df_year

In [None]:
tw = 7
diz_years_bin = {}
    
for year in years:
    diz_years_bin[year] = binarize_df(diz_years[year], tw)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for year in years:
    for target in sectors:
        oinfo_one_sector = get_o_infos_same(target, diz_years_bin[year], total=True)
        np.save(
            f"./results/economic_results/o_info/metrics/3/one_year/all_triplets/one_sector_{target}_year_{year}.npy",
            oinfo_one_sector,
        )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for year in years:
    for target in sectors:
        other_sectors = [sector for sector in sectors if sector != target]
        for i, sector in enumerate(other_sectors):

            oinfo_two_sectors = get_o_infos_two_different(
                target, sector, diz_years_bin[year], total=True
            )
            np.save(
                f"./results/economic_results/o_info/metrics/2_1/one_year/all_triplets/one_sector_{target}_two_sector_{sector}_year_{year}.npy",
                oinfo_two_sectors,
            )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for year in years:
    for target in sectors:
        other_sectors = [sector for sector in sectors if sector != target]
        for sector1 in other_sectors:
            other_sectors2 = [sector for sector in other_sectors if sector != sector1]
            for sector2 in other_sectors2:

                oinfo_three_sectors = get_o_infos_three_different(
                    target, sector1, sector2, diz_years_bin[year], total=True
                )
                np.save(
                    f"./results/economic_results/o_info/metrics/1_1_1/one_year/all_triplets/one_sector_{target}_one_sector_{sector1}_one_sector_{sector2}_year_{year}.npy",
                    oinfo_three_sectors,
                )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAMES

for target1 in sectors:
    for target2 in sectors:
        df_one_year = pd.DataFrame(columns=years)
        for year in years:
            for source in sectors:

                if target2 == target1 and source == target1:
                    oinfo_one_sector = np.load(
                        f"./results/economic_results/o_info/metrics/3/one_year/all_triplets/one_sector_{target2}_year_{year}.npy"
                    )
                    df_one_year.at[source, year] = np.mean(oinfo_one_sector)

                elif target2 == target1 and source != target1:
                    oinfo_two_sectors = np.load(
                        f"./results/economic_results/o_info/metrics/2_1/one_year/all_triplets/one_sector_{source}_two_sector_{target2}_year_{year}.npy"
                    )
                    df_one_year.at[source, year] = np.mean(oinfo_two_sectors)

                elif target2 != target1 and source == target1:
                    oinfo_two_sectors = np.load(
                        f"./results/economic_results/o_info/metrics/2_1/one_year/all_triplets/one_sector_{target2}_two_sector_{source}_year_{year}.npy"
                    )
                    df_one_year.at[source, year] = np.mean(oinfo_two_sectors)

                elif target2 != target1 and source == target2:
                    oinfo_two_sectors = np.load(
                        f"./results/economic_results/o_info/metrics/2_1/one_year/all_triplets/one_sector_{target1}_two_sector_{target2}_year_{year}.npy"
                    )
                    df_one_year.at[source, year] = np.mean(oinfo_two_sectors)

                elif target2 != target1 and source != target2:
                    oinfo_three_sectors = np.load(
                        f"./results/economic_results/o_info/metrics/1_1_1/one_year/all_triplets/one_sector_{target1}_one_sector_{target2}_one_sector_{source}_year_{year}.npy"
                    )
                    df_one_year.at[source, year] = np.mean(oinfo_three_sectors)

        df_one_year.to_csv(
            f"./results/economic_results/o_info/dataframes/1_1_1/one_year/all_triplets/dataframe_{target1}_{target2}.csv",
            index=True,
        )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE HEATMAPS

for target1 in sectors:
    for target2 in sectors:

        df = pd.read_csv(
            f"./results/economic_results/o_info/dataframes/1_1_1/one_year/all_triplets/dataframe_{target1}_{target2}.csv",
            index_col=0,
        )

        sns.heatmap(df)

        plt.axvline(x=1.5, color="yellow", linestyle="--", label="twin towers")
        plt.axvline(x=8.5, color="blue", linestyle="--", label="financial crisis")
        plt.axvline(x=9.5, color="green", linestyle="--", label="Obama president")
        plt.axvline(x=17.5, color="brown", linestyle="--", label="Trump president")
        plt.axvline(x=11.5, color="darkorchid", linestyle="--", label="Crisi debito sovrano Europa / Quantitative easing USA")
        plt.axvline(x=19.5, color="pink", linestyle="--", label="covid-19")
        plt.axvline(x=21.5, color="cyan", linestyle="--", label="Biden president")

        plt.xlabel("Year")
        plt.ylabel("Source")
        plt.title(f"1 in {target1}, 1 in {target2}, 1 in source (O-info)")

        # plt.legend()

        plt.savefig(
            f"./results/economic_results/o_info/images/1_1_1/one_year/all_triplets/heatmap_{target1}_{target2}.pdf",
            dpi=600,
            bbox_inches="tight",
        )

        plt.show()

<a name="o_info_1_1_1_two_years"></a>
#### Two years

In [None]:
# I remove 1999
years = financial_ts["Date"].dt.year.unique()[1:]

diz_years = {}

for year in years:
    df_year = financial_ts[financial_ts["Date"].dt.year == year]
    diz_years[year] = df_year
    
diz_two_years = {}

for i in range(0, len(years), 2):
    start_year = years[i]
    end_year = years[i + 1]
    combined_key = f"{start_year}_{end_year}"
    
    combined_df = pd.concat([diz_years[start_year], diz_years[end_year]])
    
    diz_two_years[combined_key] = combined_df

In [None]:
tw = 7
diz_two_years_bin = {}
years = diz_two_years.keys()

for year in years:
    diz_two_years_bin[year] = binarize_df(diz_two_years[year], tw)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for year in years:
    for target in sectors:
        other_sectors = [sector for sector in sectors if sector != target]
        for sector1 in other_sectors:
            other_sectors2 = [sector for sector in other_sectors if sector != sector1]
            for sector2 in other_sectors2:

                oinfo_three_sectors = get_o_infos_three_different(
                    target, sector1, sector2, diz_two_years_bin[year]
                )
                np.save(
                    f"./results/economic_results/o_info/metrics/1_1_1/two_years/one_sector_{target}_one_sector_{sector1}_one_sector_{sector2}_years_{year}.npy",
                    oinfo_three_sectors,
                )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAMES

for target1 in sectors:
    for target2 in sectors:
        df_two_years = pd.DataFrame(columns=years)
        for year in years:
            for source in sectors:

                if target2 == target1 and source == target1:
                    oinfo_one_sector = np.load(
                        f"./results/economic_results/o_info/metrics/3/two_years/one_sector_{target2}_years_{year}.npy"
                    )
                    df_two_years.at[source, year] = np.mean(oinfo_one_sector)

                elif target2 == target1 and source != target1:
                    oinfo_two_sectors = np.load(
                        f"./results/economic_results/o_info/metrics/2_1/two_years/one_sector_{source}_two_sector_{target2}_years_{year}.npy"
                    )
                    df_two_years.at[source, year] = np.mean(oinfo_two_sectors)

                elif target2 != target1 and source == target1:
                    oinfo_two_sectors = np.load(
                        f"./results/economic_results/o_info/metrics/2_1/two_years/one_sector_{target2}_two_sector_{source}_years_{year}.npy"
                    )
                    df_two_years.at[source, year] = np.mean(oinfo_two_sectors)

                elif target2 != target1 and source == target2:
                    oinfo_two_sectors = np.load(
                        f"./results/economic_results/o_info/metrics/2_1/two_years/one_sector_{target1}_two_sector_{target2}_years_{year}.npy"
                    )
                    df_two_years.at[source, year] = np.mean(oinfo_two_sectors)

                elif target2 != target1 and source != target2:
                    oinfo_three_sectors = np.load(
                        f"./results/economic_results/o_info/metrics/1_1_1/two_years/one_sector_{target1}_one_sector_{target2}_one_sector_{source}_years_{year}.npy"
                    )
                    df_two_years.at[source, year] = np.mean(oinfo_three_sectors)

        df_two_years.to_csv(
            f"./results/economic_results/o_info/dataframes/1_1_1/two_years/dataframe_{target1}_{target2}.csv",
            index=True,
        )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE HEATMAPS

for target1 in sectors:
    for target2 in sectors:

        df = pd.read_csv(
            f"./results/economic_results/o_info/dataframes/1_1_1/two_years/dataframe_{target1}_{target2}.csv",
            index_col=0,
        )

        sns.heatmap(df)

        plt.xlabel("Year")
        plt.ylabel("Source")
        plt.title(f"1 in {target1}, 1 in {target2}, 1 in source (O-info)")

        # plt.legend()

        plt.savefig(
            f"./results/economic_results/o_info/images/1_1_1/two_years/heatmap_{target1}_{target2}.pdf",
            dpi=600,
            bbox_inches="tight",
        )

        plt.show()

<a name="o_info_1_1_1_three_years"></a>
#### Three years

In [None]:
# I remove 1999 and 2021
years = financial_ts["Date"].dt.year.unique()[1:-1]

diz_years = {}

for year in years:
    df_year = financial_ts[financial_ts["Date"].dt.year == year]
    diz_years[year] = df_year
    
diz_three_years = {}

for i in range(0, len(years), 3):
    if i + 2 < len(years):
        start_year = years[i]
        mid_year = years[i + 1]
        end_year = years[i + 2]
        combined_key = f"{start_year}_{mid_year}_{end_year}"

        combined_df = pd.concat([diz_years[start_year], diz_years[mid_year], diz_years[end_year]])

        diz_three_years[combined_key] = combined_df

In [None]:
tw = 7
diz_three_years_bin = {}
years = diz_three_years.keys()
    
for year in years:
    diz_three_years_bin[year] = binarize_df(diz_three_years[year], tw)

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATA

for year in years:
    for target in sectors:
        other_sectors = [sector for sector in sectors if sector != target]
        for sector1 in other_sectors:
            other_sectors2 = [sector for sector in other_sectors if sector != sector1]
            for sector2 in other_sectors2:

                oinfo_three_sectors = get_o_infos_three_different(
                    target, sector1, sector2, diz_three_years_bin[year]
                )
                np.save(
                    f"./results/economic_results/o_info/metrics/1_1_1/three_years/one_sector_{target}_one_sector_{sector1}_one_sector_{sector2}_years_{year}.npy",
                    oinfo_three_sectors,
                )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE DATAFRAMES

for target1 in sectors:
    for target2 in sectors:
        df_three_years = pd.DataFrame(columns=years)
        for year in years:
            for source in sectors:

                if target2 == target1 and source == target1:
                    oinfo_one_sector = np.load(
                        f"./results/economic_results/o_info/metrics/3/three_years/one_sector_{target2}_years_{year}.npy"
                    )
                    df_three_years.at[source, year] = np.mean(oinfo_one_sector)

                elif target2 == target1 and source != target1:
                    oinfo_two_sectors = np.load(
                        f"./results/economic_results/o_info/metrics/2_1/three_years/one_sector_{source}_two_sector_{target2}_years_{year}.npy"
                    )
                    df_three_years.at[source, year] = np.mean(oinfo_two_sectors)

                elif target2 != target1 and source == target1:
                    oinfo_two_sectors = np.load(
                        f"./results/economic_results/o_info/metrics/2_1/three_years/one_sector_{target2}_two_sector_{source}_years_{year}.npy"
                    )
                    df_three_years.at[source, year] = np.mean(oinfo_two_sectors)

                elif target2 != target1 and source == target2:
                    oinfo_two_sectors = np.load(
                        f"./results/economic_results/o_info/metrics/2_1/three_years/one_sector_{target1}_two_sector_{target2}_years_{year}.npy"
                    )
                    df_three_years.at[source, year] = np.mean(oinfo_two_sectors)

                elif target2 != target1 and source != target2:
                    oinfo_three_sectors = np.load(
                        f"./results/economic_results/o_info/metrics/1_1_1/three_years/one_sector_{target1}_one_sector_{target2}_one_sector_{source}_years_{year}.npy"
                    )
                    df_three_years.at[source, year] = np.mean(oinfo_three_sectors)

        df_three_years.to_csv(
            f"./results/economic_results/o_info/dataframes/1_1_1/three_years/dataframe_{target1}_{target2}.csv",
            index=True,
        )

In [None]:
# DO NOT RUN IF YOU ALREADY HAVE THE HEATMAPS

for target1 in sectors:
    for target2 in sectors:

        df = pd.read_csv(
            f"./results/economic_results/o_info/dataframes/1_1_1/three_years/dataframe_{target1}_{target2}.csv",
            index_col=0,
        )

        sns.heatmap(df)

        plt.xlabel("Year")
        plt.ylabel("Source")
        plt.title(f"1 in {target1}, 1 in {target2}, 1 in source (O-info)")

        # plt.legend()

        plt.savefig(
            f"./results/economic_results/o_info/images/1_1_1/three_years/heatmap_{target1}_{target2}.pdf",
            dpi=600,
            bbox_inches="tight",
        )

        plt.show()