# Network Analysis on Post Trade data:

## **variation in topology and stability of the Scale-free Networks over time**

The aim of the study is the construction of Social Networks from Settlement Instructions of T2S:

- The topological structure of the Network may change over time due to disruptive events.
- Two Case-studies are conducted on disruptive events: COVID19 and BTP Italia, BTP Futura emissions.
- The identification over time of a Scale-free behavior and a ranking for the most central nodes is conducted.
- Moreover, a networks resiliency analysis is performed using random and targeted attacks.

In [20]:
import os
import io
import random
random.seed(123456789)
from datetime import datetime, timedelta
import time
import json
import gc
import pickle
import collections

import math
import numpy as np
import pandas as pd
from pandas.tseries.offsets import BDay
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors
from matplotlib.ticker import MultipleLocator, FixedFormatter, FixedLocator
import matplotlib.dates as mdates
import seaborn as sns


import pickle
import boto3
#from s3 import S3

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)


#import utils
from matplotlib.backends.backend_pdf import PdfPages

In [38]:
from statsmodels.distributions.empirical_distribution import ECDF
import networkx as nx
from networkx.algorithms import community
from scipy.stats import kstest
import powerlaw


import pyvis
from pyvis.network import Network
import igraph as ig
import leidenalg as la

In [None]:
file = open("anonnames.txt")
names = []

for i in file:
    names.append(i.split(" ")[0])

In [None]:
company_names = [ "Telecom",
"Software",
"Technology",
"Hardware",
"Electronics",
"Consulting",
"General",
"Frontier",
"Alpha",
"Industries",
"Net",
"People",
"Star",
"Bell",
"Research",
"Architecture",
"Building",
"Construction",
"Medicine",
"Hill",
"Graphics",
"Analysis",
"Vision",
"Contract",
"Solutions",
"Advanced",
"Venture",
"Innovation",
"Systems",
"Solutions",
"Provider",
"Design",
"Internet",
"Virtual",
"Vision",
"Application",
"Signal",
"Network",
"Net",
"Data",
"Electronic",
"Max",
"Adventure",
"Atlantic",
"Pacific",
"North",
"East",
"South",
"West",
"Speed",
"Universal",
"Galaxy",
"Future",
"Digital",
"Studio",
"Interactive",
"Source",
"Omega",
"Direct",
"Resource",
"Power",
"Federated",
"Star"]

In [None]:
company_types = ('LawFirm', 'Generic', 'Short')
last_names = names

def create_company_name(biz_type=None):
    name = []
    if not biz_type:
        biz_type = random.choice(company_types)

    if biz_type == "LawFirm":
        name.append( random.choice(last_names)+ ", " + random.choice(last_names) + " & " + 
                     random.choice(last_names))
        name.append('LLP')
    else:
        for i in range(1,random.randint(2,4)):
            rand_name = random.choice(company_names)
            if rand_name not in name:
                name.append(rand_name)
        if biz_type == 'Generic':
            name.append(random.choice(company_types))
        elif len(name) < 3:
            name.append(random.choice(company_names))

    return " ".join(name)

In [None]:
create_company_name("Short")

# Data Retrieval

The result plots and statistics will be stored in the dirs __/home/ec2-user/SageMaker/Tesi_results/__

- monthly subdir for monthly data

- daily subdir for daily data


In [40]:
s3_helper = S3(bucket_name='mt-res-prod-ml-bucket')

In [None]:
# The path to the directory where the input data is
bucket_name = 'mt-res-prod-ml-bucket'

#output_path = f's3://{bucket_name}/tmp-andrea-spark'
ROOT_PATH = '/home/ec2-user/SageMaker/Tesi_results/'
if 'Tesi_results' not in os.listdir('/home/ec2-user/SageMaker/'):
    os.mkdir('/home/ec2-user/SageMaker/Tesi_results')

PATH_TO_INPUT_DIR=f'TESI/Tesi-Andrea/RAW'
PATH_TO_DATASET = f'TESI/Tesi-Andrea/DATA'
path_to_root_directory = f's3://{bucket_name}/{PATH_TO_DATASET}'

# Define the partition column of the input directory

PARTITION_COLS=['dt_business']
START_DATE = '2018-05-01'
#END_DATE = '2018-05-04'
END_DATE = '2021-07-31'

anonymize_data = True
if anonymize_data:
    ANONYMIZE_DICT = dict()
    ROOT_PATH = ROOT_PATH+'anonymized/'
    os.makedirs(ROOT_PATH, exist_ok=True)
    if 'anonymize_companies.pkl' in os.listdir(ROOT_PATH):
        with open(ROOT_PATH+'anonymize_companies.pkl', 'rb') as handle:
            ANONYMIZE_DICT = pickle.load(handle)

In [None]:
def read_input_dataset(START_DATE, END_DATE):

    # Read the input .parquet dataset
    raw_df = s3_helper.read_parquet(remote_dir=PATH_TO_INPUT_DIR, partition_cols=PARTITION_COLS, start_date =START_DATE,end_date=END_DATE)   
    # Reset the index of the Pandas DataFrame
    raw_df = raw_df.reset_index(drop=True)
  

    # convert column names to lowercase 
    raw_df.columns = [x.lower() for x in raw_df.columns]    

    return raw_df

In [None]:
def get_data(daterange="all",force = False, mode = 'description'):

    if mode == 'description':
        features_agg = ['dt_business','ds_deli_pty1','ds_rece_pty1','cd_sett','cd_sec_at','ind_etf_mkt','cd_si_xb_type', 'id_isin','ds_isin']
        path = '/DAILY'

    if mode == 'bic':
        features_agg = ['dt_business','id_deli_pty1_bic','id_rece_pty1_bic','cd_sett','cd_sec_at','ind_etf_mkt','cd_si_xb_type']
        path = '/BIC'
        
    if daterange == "all":
        start_date = datetime.strptime(START_DATE, '%Y-%m-%d')
        end_date = datetime.strptime(END_DATE, '%Y-%m-%d')

    else:
        start_date = datetime.strptime(daterange[0], '%Y-%m-%d')
        end_date = datetime.strptime(daterange[1], '%Y-%m-%d') 


    def load_data():
        df = pd.DataFrame()
        csv_list =[i for i in [i.key.split("/")[-1] for i in s3_helper.bucket.objects.filter(Prefix=PATH_TO_DATASET+'/DAILY')] if datetime.strptime(i.split('.csv')[0], '%Y-%m-%d') >= start_date and  datetime.strptime(i.split('.csv')[0], '%Y-%m-%d') <= end_date ]
        df = pd.DataFrame()
        print("Load data from", csv_list[0], 'to', csv_list[-1])
        
        for f in csv_list:
            #print(f)
            df = pd.concat([pd.read_csv(os.path.join(path_to_root_directory+path,f)),df])
        return df

    if force: 
        day = datetime.strptime(START_DATE, '%Y-%m-%d')
        holidays = []
        
        while day <= datetime.strptime(END_DATE, '%Y-%m-%d'):
            string_day = str(day).split(" ")[0]  
            if day.weekday() < 5:
                
                try:
                    raw_df = read_input_dataset(string_day,string_day)
                    raw_group = raw_df.groupby(features_agg)[['am_pend','am_amt']].sum()
                    raw_group = raw_group.reset_index()
                    #display(raw_group.head())

                    print(raw_group.columns)
                    utils.upload_dataset_to_aws_s3_v2(df=raw_group, \
                                    bucket='mt-res-prod-ml-bucket',\
                                    prefix=PATH_TO_DATASET+path,\
                                    output_file_name=f'{string_day}.csv',\
                                    index=False, header=True, sep=',', decimal='.')

                    print("UPLOADED to s3", f'{string_day}.csv')
                    print(string_day)

                except ValueError:
                    holidays.append(string_day)





            day += timedelta(days=1)
        print("Festivity:", holidays)
    print("Data loading concluded")

    return load_data()

In [None]:
def check_anonymized(node_list):

    for i in node_list:
        if i == 'MONTE TITOLI':
             ANONYMIZE_DICT[i] = 'MONTE TITOLI'

        if i == 'C.COMP.GARANZIA':
             ANONYMIZE_DICT[i] = 'C.COMP.GARANZIA'
                
        r_name =  create_company_name("Short")

        if r_name in ANONYMIZE_DICT.values():
            flag = True

            while flag :
                r_name =  create_company_name("Short")
                if r_name not in ANONYMIZE_DICT.values():
                    flag = False

        if i not in ANONYMIZE_DICT.keys():
            ANONYMIZE_DICT[i] = r_name

    with open(ROOT_PATH+'anonymize_companies.pkl', 'wb') as handle:
            pickle.dump(ANONYMIZE_DICT, handle, protocol=pickle.HIGHEST_PROTOCOL)

    return  [ANONYMIZE_DICT[i] for i in node_list]

# Graph Generation

In [None]:
def df_generate_dict(df,financial_instrument_agg = False, external_agg=False,sett_agg=False):

    df_dict = dict()

    if not financial_instrument_agg and not sett_agg and not external_agg:
        fi_list = df['cd_sec_at'].unique()
        sett_list = df['cd_sett'].unique()
        xb_list  = df['cd_si_xb_type'].unique()
       
        for fi in fi_list:
            for sett in sett_list:
                for xb in xb_list: 
                    df_dict[fi+"_"+sett+"_"+xb] = df[ (df['cd_sec_at'] == fi) & (df['cd_sett'] == sett) & (df['cd_si_xb_type'] == xb)]

    if financial_instrument_agg and sett_agg and not external_agg:
        xb_list  = df['cd_si_xb_type'].unique()
  
        for xb in xb_list: 
            df_dict[xb] = df[(df['cd_si_xb_type'] == xb)]
        return df_dict 

    if not financial_instrument_agg and sett_agg and not external_agg:
        fi_list = df['cd_sec_at'].unique()
        
      #  sett_list = df['cd_sett'].unique()
        xb_list  = df['cd_si_xb_type'].unique()

        for fi in fi_list:
         #   for sett in sett_list:
                for xb in xb_list: 
                    df_dict[fi+"_"+xb] = df[ (df['cd_sec_at'] == fi) & (df['cd_si_xb_type'] == xb)]
        return df_dict

    if financial_instrument_agg and sett_agg and external_agg :
        sett_list = df['cd_sett'].unique()

        for sett in sett_list:
            df_dict[sett] = df[ (df['cd_sett'] == sett)]
        return df_dict    

    
    if not financial_instrument_agg and  not sett_agg and external_agg:
        fi_list = df['cd_sec_at'].unique()
        sett_list = df['cd_sett'].unique()
        for fi in fi_list:

            for sett in sett_list:
                    df_dict[fi+"_"+sett] = df[ (df['cd_sec_at'] == fi) & (df['cd_sett'] == sett) ]

    
    if financial_instrument_agg and not sett_agg and  external_agg:
        sett_list = df['cd_sett'].unique()

        for sett in sett_list:
                df_dict[sett] = df[(df['cd_sett'] == sett) ]


    if financial_instrument_agg and not sett_agg and not external_agg:
        sett_list = df['cd_sett'].unique()
        xb_list  = df['cd_si_xb_type'].unique()

        for sett in sett_list:
            for xb in xb_list: 
                    df_dict[sett+"_"+xb] = df[ (df['cd_sec_at'] == fi) & (df['cd_sett'] == sett) & (df['cd_si_xb_type'] == xb)]

    
    if financial_instrument_agg and not sett_agg and not external_agg :
        sett_list = df['cd_sett'].unique()
        xb_list = df['cd_si_xb_type'].unique()

        for sett in sett_list:
            for xb in xb_list: 
                    df_dict[sett+"_"+xb] = df[  (df['cd_sett'] == sett) & (df['cd_si_xb_type'] == xb)]

                    
    if financial_instrument_agg and not sett_agg and external_agg :
        sett_list = df['cd_sett'].unique()
        
        for sett in sett_list:
            df_dict[sett] = df[ (df['cd_sett'] == sett)]
        return df_dict

    if financial_instrument_agg and  sett_agg and external_agg:
        df_dict['all'] = df     

    return df_dict

In [None]:
def preprocess_to_graph(df, sett_agg=False):

    df['am_pend']= df['am_pend'].astype(int)

    df['am_amt']=  df['am_amt'].astype(int)

    

    df = df[(df['am_amt'] != 0) | (df['am_pend'] != 0 )]

    

    if  sett_agg== False:

        df = df[(df['cd_sett'] == "S") | (df['cd_sett']=="N")]

    return df

In [None]:
def create_graph(df, date_range='all', external_agg = False, financial_instrument_agg = False,sett_agg=False, direction= 'D', mode = None):

    if mode == 'bic':

        all_musk = ['dt_business','id_deli_pty1_bic','id_rece_pty1_bic','cd_sett','cd_sec_at','cd_si_xb_type']

        src_name = 'id_deli_pty1_bic'

        dst_name = 'id_rece_pty1_bic'

    else:

        all_musk = ['dt_business','ds_deli_pty1','ds_rece_pty1','cd_sett','cd_sec_at','cd_si_xb_type']

        src_name = 'ds_deli_pty1'

        dst_name = 'ds_rece_pty1'        

    if date_range == 'all':

        all_musk.remove('dt_business')

    if date_range == 'Y':

        return

    if date_range == 'M':

        return

    if date_range == 'W':

        return 

    if date_range == 'D':

        return

    

    if external_agg:

        all_musk.remove('cd_si_xb_type')

        

    if financial_instrument_agg:

        all_musk.remove('cd_sec_at')

        

    if sett_agg:

        all_musk.remove('cd_sett')



  

    df = df.groupby(all_musk)[['am_pend','am_amt']].sum().reset_index()

    

    df_pp = preprocess_to_graph(df, sett_agg=sett_agg )

    df_pp_dict = df_generate_dict(df_pp,external_agg= external_agg,financial_instrument_agg=financial_instrument_agg, sett_agg=sett_agg)

    

    anonmized_dict = dict()



    graph_dict = dict()

    for df_name in df_pp_dict:

        src_list = list(df_pp_dict[df_name][src_name])

        dst_list = list(df_pp_dict[df_name][dst_name])

        

        if anonymize_data:

            src_list = check_anonymized(src_list)

            dst_list = check_anonymized(dst_list)





        

        w_list = list(df_pp_dict[df_name]['am_amt'])

       

        if direction == 'D':

            G = nx.DiGraph()

        else:

            G = nx.Graph()

        assert(len(src_list) == len(dst_list) == len(w_list))

        for index in range(len(src_list)):

            src =  src_list[index]

            dst = dst_list[index]



            w = w_list[index]

            

         

            if G.has_edge(src,dst):

                    raise Exception(src,"_",dst," edge already in the graph")

                    

            if src is None or dst is None or w ==0:

                print("NONE or ZERO")

                raise Exception("Zero or None value in the edge")

      

            else:



                G.add_weighted_edges_from([(src,dst,int(w))])

       

        assert len(G.nodes()) ==len(list(set(df_pp_dict[df_name][src_name].unique()) | set(df_pp_dict[df_name][dst_name].unique()))), "Number of nodes and company names must be equal"

        assert len(G.edges())== len(df_pp_dict[df_name]) , "Number of edges and number of dataframe rows must be equal"



        graph_dict[df_name] = G

    return graph_dict

In [None]:
def compute_power_law_exponent(degree_list):        

    fit = powerlaw.Fit(degree_list)

    

    alpha = fit.power_law.alpha

    xmin = fit.power_law.xmin

    test, p = kstest(degree_list, "powerlaw", args = (alpha,xmin),N=len(degree_list))

    return {'p-value':p,'test':test,'exp': alpha}

In [None]:
def tenth_centrality(G, centr_type ,w = None):

    if centr_type == 'degree':

        centr = nx.degree_centrality(G)

    elif centr_type == 'closeness':

        centr = nx.closeness_centrality(G)

    elif centr_type == 'betweenness':

        centr = nx.betweenness_centrality(G, weight= w)

    elif centr_type == 'eigenvector':

        try:

            centr = nx.eigenvector_centrality(G, weight= w)

        except:

            centr = "No Eigenvector Centrality"

            return centr

    elif centr_type == 'pagerank':

        centr = nx.pagerank(G, weight= w)

    else:

        raise Exception("Centrality type not found [try degree, betweenness, closeness, eigenvector, pagerank]")

    sort_orders = sorted(centr.items(), key=lambda x: x[1], reverse=True)

    return sort_orders[:10]

In [None]:
def compute_graph_stats(G, name="",str_to_write = ""):

    

    df = dict()

    str_to_write = ""

    order = G.order()

    size = G.size()

    if order <= 0: 

        return {"string_to_write":"Not applicable, size or order are equal to 0"}

    

    if order <=3:

        return {"string_to_write":"Not applicable, size or order lesser than 3"}

    try:

        order_size_ratio = size/order

      

    except:

          order_size_ratio = "NA"

            





    

    str_to_write+=f'\n▸Number of nodes: {order} - Number of links:{size} - Size/Order ratio: {order_size_ratio}'

    degree= list(dict(G.degree()).values())

    

    str_to_write+=f'\n▸Standard deviation: {np.std(degree)}'

    str_to_write+= f'\n▸Mean: {np.mean(degree)}'

    str_to_write+= f'\n▸Median: {np.median(degree)}'

    str_to_write+= f'\n▸Min: {np.min(degree)}'

    str_to_write+= f'\n▸Max: {np.max(degree)}'

    

    in_degree = list(dict(G.in_degree()).values())

    str_to_write+=f'\n▸Standard deviation in_degree: {np.std(in_degree)}'

    str_to_write+=f'\n▸Mean in_degree: {np.mean(in_degree)}'

    str_to_write+=f'\n▸Median in_degree: {np.median(in_degree)}'

    str_to_write+=f'\n▸Min in_degree: {np.min(in_degree)}'

    str_to_write+=f'\n▸Max in_degree: {np.max(in_degree)}'

    

    

    out_degree = list(dict(G.out_degree()).values())

    str_to_write+=f'\n▸Standard deviation out_degree: {np.std(out_degree)}'

    str_to_write+=f'\n▸Mean out_degree: {np.mean(out_degree)}'

    str_to_write+=f'\n▸Median out_degree: {np.median(out_degree)}'

    str_to_write+=f'\n▸Min out_degree: {np.min(out_degree)}'

    str_to_write+=f'\n▸Max out_degree: {np.max(out_degree)}'

    

    

    degree_weighted= list(dict(G.degree(weight="weight")).values())

    str_to_write+=f'\n▸Standard deviation weighted: {np.std(degree_weighted)}'

    str_to_write+=f'\n▸Mean weighted: {np.mean(degree_weighted)}'

    str_to_write+=f'\n▸Median weighted: {np.median(degree_weighted)}'

    str_to_write+=f'\n▸Min weighted: {np.min(degree_weighted)}'

    str_to_write+=f'\n▸Max weighted: {np.max(degree_weighted)}'

    

    in_degree_weighted = list(dict(G.in_degree(weight="weight")).values())

    str_to_write+=f'\n▸Standard deviation in_degree weighted: {np.std(in_degree_weighted)}'

    str_to_write+=f'\n▸Mean in_degree weighted: {np.mean(in_degree_weighted)}'

    str_to_write+=f'\n▸Median in_degree weighted: {np.median(in_degree_weighted)}'

    str_to_write+=f'\n▸Min in_degree weighted: {np.min(in_degree_weighted)}'

    str_to_write+=f'\n▸Max in_degree weighted: {np.max(in_degree_weighted)}'

    

    

    out_degree_weighted = list(dict(G.out_degree(weight="weight")).values())

    str_to_write+=f'\n▸Standard deviation out_degree weighted: {np.std(out_degree_weighted)}'

    str_to_write+=f'\n▸Mean out_degree weighted: {np.mean(out_degree_weighted)}'

    str_to_write+=f'\n▸Median out_degree weighted: {np.median(out_degree_weighted)}'

    str_to_write+=f'\n▸Min out_degree weighted: {np.min(out_degree_weighted)}'

    str_to_write+=f'\n▸Max out_degree weighted: {np.max(out_degree_weighted)}'

  







    density = nx.density(G)

    str_to_write+=f'\n▸Density: {density}'



    avg_clustering = nx.average_clustering(G)

    str_to_write+=f'\n▸Avg. Clustering coeff: {avg_clustering}'

    transitivity = nx.transitivity(G)

    str_to_write+=f'\n▸Transitivity: {transitivity}'

    assortativity =  str(nx.degree_assortativity_coefficient(G))

    str_to_write+=f'\n▸Assortativity coefficient: {assortativity}'

    

    assortativity_w =  str(nx.degree_assortativity_coefficient(G,weight='weigth'))

    str_to_write+=f'\n▸Assortativity weighted coefficient: {assortativity_w}'

    

    pearson_assortativity = nx.degree_pearson_correlation_coefficient(G)

    str_to_write+=f'\n▸Pearson Assortativity coefficient: {pearson_assortativity}' 



    try:

        avg_shortest_path_length = nx.average_shortest_path_length(G)

    except nx.NetworkXError:

        avg_shortest_path_length = "is weakly connected"

        

    dag = nx.is_directed_acyclic_graph(G)



    try:

        diameter = nx.algorithms.distance_measures.diameter(G)

    except:

        diameter = float("inf")



    wk_comps = [len(c) for c in sorted(nx.weakly_connected_components(G),key=len, reverse=True)]

    is_weak =  nx.is_strongly_connected(G)

    sg_comps = [len(c) for c in sorted(nx.strongly_connected_components(G),key=len, reverse=True)]

    is_strong = nx.is_weakly_connected(G)

    

    str_to_write+=f'\n▸Average Shortest Path Length: {avg_shortest_path_length}'

    str_to_write+= f'\n▸Diameter: {diameter}'

    str_to_write+= f'\n▸Is DAG?: {dag}'



    str_to_write+= f'\n▸Weakly Connected Components: {wk_comps}'

    str_to_write+= f'\n▸Is Weakly connected?:  {is_weak}'

    

    str_to_write+= f'\n▸Strongly Connected Components: {sg_comps}'

    str_to_write+= f'\n▸Is Strongly connected?:  {is_strong}'   

    

    

    

    deg_centr = nx.degree_centrality(G)

    sort_orders_dc = sorted(deg_centr.items(), key=lambda x: x[1], reverse=True)



    #for i in range(10):

        #print(sort_orders_dc[i])

    

    

    degree_Centrality = tenth_centrality(G, centr_type="degree")

    betweenesCentrality = tenth_centrality(G, centr_type="betweenness")

    closenessCentrality = tenth_centrality(G, centr_type="closeness")

    eigenCentrality = tenth_centrality(G, centr_type="eigenvector")

    pagerankCentrality = tenth_centrality(G, centr_type="pagerank")

    

    betweenesCentrality_w = tenth_centrality(G, centr_type="betweenness", w='weight')

    eigenCentrality_w = tenth_centrality(G, centr_type="eigenvector", w='weight')

    pagerankCentrality_w = tenth_centrality(G, centr_type="pagerank", w='weight')



    str_to_write+=f'\n▸10 most important nodes for Degree Centrality:\n{degree_Centrality}'

    str_to_write+=f'\n▸10 most important nodes for Betweennes Centrality:\n{betweenesCentrality}'

    str_to_write+=f'\n▸10 most important nodes for Closeness Centrality:\n{closenessCentrality}'

    str_to_write+=f'\n▸10 most important nodes for Eigenvector Centrality:\n{eigenCentrality}'

    str_to_write+=f'\n▸10 most important nodes for Page Rank:\n{pagerankCentrality}'

    

    

    str_to_write+=f'\n▸10 most important nodes for Betweennes Centrality Weighted:\n{betweenesCentrality}'

    str_to_write+=f'\n▸10 most important nodes for Eigenvector Centrality Weighted:\n{eigenCentrality}'

    str_to_write+=f'\n▸10 most important nodes for Page Rank Weighted:\n{pagerankCentrality}'



    

    percentile_90 = np.percentile(degree,90)

    str_to_write+=f'\n▸90-percentile degree: {percentile_90}'

    hub_nodi = [k for k,v in dict(G.degree()).items() if v>= percentile_90]

    str_to_write+=f'\n▸Number of nodes in HUBs: {len(hub_nodi)}'

    str_to_write+=f'\n▸List of nodes in HUBs:\n{list(hub_nodi)}'



    percentile_90_in = np.percentile(in_degree,90)

    str_to_write+=f'\n▸90-percentile degree: {percentile_90_in}'

    hub_nodi_in = [k for k,v in dict(G.degree()).items() if v>= percentile_90_in]

    str_to_write+=f'\n▸Number of nodes in HUBs: {len(hub_nodi_in)}'

    str_to_write+=f'\n▸List of nodes in HUBs:\n{list(hub_nodi_in)}'



    percentile_90_out = np.percentile(out_degree,90)

    str_to_write+=f'\n▸90-percentile degree: {percentile_90_out}'

    hub_nodi_out = [k for k,v in dict(G.degree()).items() if v>= percentile_90_out]

    str_to_write+=f'\n▸Number of nodes in HUBs: {len(hub_nodi_out)}'

    str_to_write+=f'\n▸List of nodes in HUBs:\n{list(hub_nodi_out)}'

    

    

    isolates = list(nx.isolates(G))

    str_to_write+=f'\n▸Isolated nodes:{isolates}'



    

    

    

    

    # Not working on directed Graphs

    #print("Network connected?",nx.is_connected(G))

    #print("# Connected components",nx.number_connected_components(G))

    #triangles = len(nx.triangles(G))

    #print("Number of triangles:",triangles)

   

 #   deg_PL = compute_power_law_exponent(degree)

 #   deg_W_PL = compute_power_law_exponent(degree_weighted)

 #   deg_in_PL = compute_power_law_exponent(in_degree)

 #   deg_in_W_PL= compute_power_law_exponent(in_degree_weighted)

 #   deg_out_PL= compute_power_law_exponent(out_degree)

 #   deg_out_W_PL = compute_power_law_exponent(out_degree_weighted)

    

  #  str_to_write+=f'\n▸K-test for PowerLaw distribution'



 #   str_to_write+=f'\n▸Power Law K-test on Degree: {deg_PL}'

 #   str_to_write+=f'\n▸Power Law K-test on In-Degree: {deg_in_PL}'

 #   str_to_write+=f'\n▸Power Law K-test on Weighted In-Degree: {deg_in_W_PL}'

 #   str_to_write+=f'\n▸Power Law K-test on Out-Degree: {deg_out_PL}'

 #  str_to_write+=f'\n▸Power Law K-test on Weighted Out-Degree: {deg_out_W_PL}'



    



    

    return {'string_to_write':str_to_write,'order':order,'size':size, 'order_size_ratio': order_size_ratio, 'avg_shortest_path_length': avg_shortest_path_length,\

            'mean':np.mean(degree),'std':np.std(degree),'median':np.median(degree),'min_deg':np.min(degree),'max_deg':np.max(degree),\

            \

            'mean_in':np.mean(in_degree),'std_in':np.std(in_degree),\

            'median_in':np.median(in_degree),'min_deg_in':np.min(in_degree),'max_deg_in':np.max(in_degree),\

            \

            'mean_out':np.mean(out_degree),'std_out':np.std(out_degree),\

            'median_out':np.median(out_degree),'min_deg_out':np.min(out_degree),'max_deg_out':np.max(out_degree),\

            \

            'mean_weighted':np.mean(degree_weighted),'std_weighted':np.std(degree_weighted),\

            'median_weighted':np.median(degree_weighted),'min_deg_weighted':np.min(degree_weighted),'max_deg_weighted':np.max(degree_weighted),\

            \

            'mean_in_weighted':np.mean(in_degree_weighted),'std_in_weighted':np.std(in_degree_weighted),\

            'median_in_weighted':np.median(in_degree_weighted),'min_deg_in_weighted':np.min(in_degree_weighted),'max_deg_in_weighted':np.max(in_degree_weighted),\

            \

            'mean_out_weighted':np.mean(out_degree_weighted),'std_out_weighted':np.std(out_degree_weighted),\

            'median_out_weighted':np.median(out_degree_weighted),'min_deg_out_weighted':np.min(out_degree_weighted),'max_deg_out_weighted':np.max(out_degree_weighted),\

            \

            'assortativity':assortativity, 'pearson_assortativity':pearson_assortativity, 'assortativity_weigthed':assortativity_w, \

            'transitivity':transitivity,'avg_clustering':avg_clustering,'density':density,\

            'diameter':diameter, 'is_dag':dag, 'wk_comps':wk_comps, 'is_weak':is_weak, 'sg_comps':sg_comps, 'is_strong':is_strong,\

            'degree_centrality':degree_Centrality,'betweennes_centrality':betweenesCentrality, 'closeness_centrality':closenessCentrality, \

            'eigen_centrality':eigenCentrality,'pagerank_centrality':pagerankCentrality, \

            'betweennes_centrality_weighted':betweenesCentrality_w,'eigen_centrality_weighted':eigenCentrality_w,\

            'pagerank_centrality_weighted':pagerankCentrality_w,\

            '90-percentile_degree':percentile_90,'hubs':list(hub_nodi),\

            'hubs_number':len(hub_nodi),'isolated_nodes':isolates, \

#'PL_degree_p':deg_PL['p-value'], 'PL_degree_t':deg_PL['test'],'PL_degree_exp':deg_PL['exp'], \

          #  'PL_degree_weighted_p': deg_W_PL['p-value'],'PL_degree_weighted_t': deg_W_PL['test'], 'PL_degree_weighted_exp': deg_W_PL['exp'],  \

          #  'PL_in_degree_p': deg_in_PL['p-value'], 'PL_in_degree_t': deg_in_PL['test'], 'PL_in_degree_exp': deg_in_PL['exp'],\

          #  'PL_in_degree_weighted_p': deg_in_W_PL['p-value'],'PL_in_degree_weighted_t': deg_in_W_PL['test'],'PL_in_degree_weighted_exp': deg_in_W_PL['exp'],\

          #  'PL_out_degree_p': deg_out_PL['p-value'],  'PL_out_degree_t': deg_out_PL['test'], 'PL_out_degree_exp': deg_out_PL['exp'],\

         #   'PL_out_degree_weighted_p': deg_out_W_PL['p-value'], 'PL_out_degree_weighted_t': deg_out_W_PL['test'],'PL_out_degree_weighted_exp': deg_out_W_PL['exp'] \

           }

# **Internal and External Instruction Analysis**

Daily aggregation: for each business day the data are aggregated for Deliver and Receiver name, Cross Border Instruction Indicator, Type of financial instrument, Status of the instruction. For each of this feature different networks are obtained.

In [None]:
def get_daily_graph(daily_df, mode = None,ea = False, fi = False, sa=True ):

    G_dict = dict()

    for dt in list(daily_df['dt_business'].unique()):

        G_dict[dt] = dict()

     

        df = daily_df[daily_df['dt_business'] == dt]

        

    

        G_dict[dt] = create_graph(df,external_agg=ea,financial_instrument_agg=fi,sett_agg = sa, mode = mode)

        if fi == False:

            G_dict_ETF = create_graph(df[df['ind_etf_mkt'] == 1],external_agg = ea, financial_instrument_agg = True,sett_agg=sa, mode= mode)

        

     

        

            for key in list(G_dict_ETF.keys()):

                G_dict_ETF["ETF_"+key] = G_dict_ETF.pop(key)

        else:

            G_dict_ETF = dict()



        G_dict[dt] = {**G_dict_ETF, **G_dict[dt]}

    return G_dict

In [None]:
def plot_stats_distr(df,feature, comparison =False, comparison_splitdate = None, name="",  save = False,save_name ="", trend_line=True):

    x = [i.split("_")[0] for i in list(df.index)]

    y = list(df[feature])

    x_init = x

    xd = mdates.date2num(list(pd.to_datetime(x)))

    y_init = y

  

  

 

    labels = [feature]

#     cmap = cm.get_cmap('tab20c', 1)

    colr = ['blue','red','green'] #[cmap.colors[0]]

    label2 = None

    if comparison:

        if comparison_splitdate is None:

            raise Expception("If you want to compare before and after, please specify the comparison_splitdate parameter")

        else:

            while True:

                try:

               #     print(comparison_splitdate)

               #     print(type(comparison_splitdate))

                    if type(comparison_splitdate) is tuple:

                      

                        index = [x.index(i) for i in comparison_splitdate]

                        part1 = index[0]

                        part2 = index[1]

        

                        x2 = x[part1:part2+1]

                        x3 = x[part2:] 

                        x = x[:part1+1]



                        y2 = y[part1:part2+1]

                        y3 = y[part2:]

                        y = y[:part1+1]

                    

                        

                        #cmap = cm.get_cmap('tab20c', len(index)+1)

                        #colr = cmap.colors

                        labels = [feature+" Before",feature+" BTP Emission",feature+ " After"]

                    if type(comparison_splitdate) is str:

                        index= x.index(comparison_splitdate)

                        x2 = x[index:]

                        x = x[:index+1]

                        y2 = y[index:]

                        y = y[:index+1]



                        #cmap = cm.get_cmap('tab20c', 2)

                        #colr = cmap.colors



                        labels = [feature+" Before",feature+ " After"]

                   # print("FINAL date:",comparison_splitdate)    

                    break

                except:

                    #display(df)

                    comparison_splitdate_todate = datetime.strptime(comparison_splitdate, '%Y-%m-%d')+timedelta(days=1)

                    comparison_splitdate = datetime.strftime(comparison_splitdate_todate,'%Y-%m-%d')

                   # print("TRY:",comparison_splitdate)

                    #raise Exception("Date not found in dataframe")

            

    



    if feature == 'avg_shortest_path_length':

        trend_line = False

        for i in range(len(y)):

            if type(y[i]) is str:

                y[i] = None

        if comparison:

            if type(comparison_splitdate) is str:

                for i in range(len(y2)):

                    if type(y2[i]) is str:

                        y2[i] = None         

            if type(comparison_splitdate) is tuple:   

                for i in range(len(y2)):

                    if type(y2[i]) is str:

                        y2[i] = None 

                for i in range(len(y3)):

                    if type(y3[i]) is str: 

                        y3[i] = None

    if trend_line:                    

        #xd = x_init#mdates.date2num(list(pd.to_datetime(x_init)))

        z = np.polyfit(xd,y_init,1)

        p = np.poly1d(z)

        sns.lineplot(x= x_init,y = p(xd),color='black',linewidth = 3, label='trend',linestyle="dashed")



    sns.set(rc={'figure.figsize':(30,15)}, style="white", font_scale=2.5)

    plot  = sns.lineplot(x=x,y=y, linewidth = 3, color=colr[0], label = labels[0])

    

    if comparison:

        if type(comparison_splitdate) is str:

            sns.lineplot(x=x2,y=y2, linewidth = 3, color=colr[1], label=labels[1])

        if type(comparison_splitdate) is tuple:

            sns.lineplot(x=x2,y=y2, linewidth = 3, color=colr[1], label=labels[1])

            sns.lineplot(x=x3,y=y3, linewidth = 3, color=colr[2], label=labels[2])

    



    

    plot.set_title(name+" - Distribution over time - "+feature)

    plot.set_ylabel(feature)

    plot.set_xlabel("Date")

    plt.legend()

    if len(x) >100:

        for ind, label in enumerate(plot.get_xticklabels()):

            if ind % 3 == 0:  # every 10th label is kept

                label.set_visible(True)

            else:

                label.set_visible(False)

  #  for ind, label in enumerate(plot.get_yticklabels()):

  #      if ind % 3 == 0:  # every 10th label is kept

  #          label.set_visible(True)

  #      else:

  #          label.set_visible(False)



    plot.tick_params(axis='x', rotation=90, labelsize="xx-small")

    if save: 

        if 'daily' not in os.listdir(ROOT_PATH):

            os.mkdir(ROOT_PATH+'daily')

            os.mkdir(ROOT_PATH+'daily/covid19/')

        if 'covid19' not in os.listdir(ROOT_PATH+'daily/'):

            os.mkdir(ROOT_PATH+'daily/covid19/')  

                     

        figure = plot.get_figure() 

        figure.savefig(f'{ROOT_PATH}/daily/covid19/{name}-{save_name}.png')

   #     plt.clf()

        return plot

    

    return plot

In [None]:
def get_df_stats(Graph_dict):

    df_stats = dict() 

    for dt in sorted(list(Graph_dict.keys())):

        for g_name in list(Graph_dict[dt].keys()):

            print(g_name,dt)

            df_stats[dt+"_"+g_name] = compute_graph_stats(Graph_dict[dt][g_name], name=dt+"_"+g_name, str_to_write="")

    df = pd.DataFrame.from_dict(df_stats, orient='index')

    df.assortativity = df.assortativity.astype(float)

    df.drop('string_to_write', axis=1,inplace =True)

    return df

In [None]:
def get_covid_plots(df1,split_date="", path_dirs="", df_name = "",  comparison = False,skip_instr = [], merge = False):

    important_features = ['mean','avg_shortest_path_length','transitivity','avg_clustering', 'assortativity', 'density']

    df_init = df1 

    

    path = ROOT_PATH+ path_dirs 

    os.makedirs(path, exist_ok=True)

    if merge:

        list_of_split = list(set([i.split('_')[1] for i in df_init.index]))

    else:

        list_of_split = list(set([i.split('_')[1]+"_"+i.split("_")[2] for i in df_init.index]))

    for instr in list_of_split:

        if instr in skip_instr:

            continue

        print(instr)

        df = df_init[df_init.index.str.contains(instr)]

      

  

        plots = []

        

        if comparison:

            comparison_splitdate = split_date #df2[df2.index.str.contains(instr)].index[0].split("_")[0]

        else:

            comparison_splitdate = None

        for imp in important_features:



            plots.append(plot_stats_distr(df, imp, name =df_name,comparison =comparison, comparison_splitdate=comparison_splitdate, save =False))

            plt.close()

 

                     

        with PdfPages(f'{path}{df_name}_{instr}.pdf') as pdf:

            for p in plots:      

                fig=p.get_figure()

                pdf.savefig(fig)


In [None]:
dates = [('2018-05-02','2018-12-31'),('2019-01-01','2019-12-31'),('2020-01-01','2020-12-31'),('2021-01-01','2021-07-31')]

ind = ['2018', '2019', '2020', '2021']

for d in dates:

    df_d = get_data(daterange=d,mode ='bic')

    daily_graph_dict = get_daily_graph(df_d, mode ="bic")

    stats_ext = get_df_stats(daily_graph_dict, get_dict= True)

    get_covid_plots(stats_ext,  path_dirs="daily/external/"+ind[dates.index(d)]+"/", df_name="external-agg")

    #get_ext_int_plots(stats_ext,  path_dirs="daily/external/"+ind[dates.index(d)]+"/", df_name="external-agg")

In [None]:
dates = [('2018-05-02','2018-12-31'),('2019-01-01','2019-12-31'),('2020-01-01','2020-12-31'),('2021-01-01','2021-07-31')]

ind = ['2018', '2019', '2020', '2021']



for d in dates:

    path = ROOT_PATH+"daily/external-agg/"+ind[dates.index(d)]+"/"

    df_d = get_data(daterange=d,mode ='bic')

    daily_graph_dict = get_daily_graph(df_d, mode ="bic",ea = False, fi = True, sa= True)

    stats_ext, stats_ext_dict = get_df_stats(daily_graph_dict, get_dict=True)

    get_covid_plots(stats_ext,  path_dirs=path, df_name="external-agg", merge = True)



    

    df_stats = dict() 

    os.makedirs(path+"stats/",exist_ok=True)

    file = open(path+"stats/"+ind[dates.index(d)]+".txt", "w") 

    str_to_write_merged = ""

    

    for key in list(stats_ext_dict.keys()):    

        str_to_write_merged += "\n\n============ "+key+" ============\n"

        str_to_write_merged+=  stats_ext_dict[key]['string_to_write']



        str_to_write_merged+="\n"

    

    file.write(str_to_write_merged)

    file.close()

    


In [None]:
stats_ext

# **Disruptive Events**

Disruptive Events could reversely change the Network topology and structure. Studies on disruptive events such as the September 11th 2001 terrorist attack have shown how this event can alter economics systems.Financial system of the USA have been disrupted, this affected the structure of interbank payments. This is due to the fact that the attacks damaged property and communications systems making impossible for many bank to execute payments. A similar study is performed taking into consideration two different case-studies of disruptive events:

- The impact of Covid19 on the network metrics and topology during the first lockdown (January 2020 - June 2020)
- The impact of large emission of Government Bonds such as BTP Italia and BTP Futura on the network metric and topology. The analysis takes into consideration the next ten days after the BTP announcement. This is considered a disruptive event since large amount of instructions are exchanged during the emission dates.

## **Case study: Impact of Covid19**

The impact of Covid19 on the network metrics and topology during the first lockdown (January 2020 - June 2020)

In [None]:
covid_df_before = get_data(daterange=('2019-01-01','2020-01-31'))

covid_df_after = get_data(daterange=('2020-02-01','2020-12-31'))

In [None]:
def get_daily_graph(daily_df):

    G_dict = dict()

    for dt in list(daily_df['dt_business'].unique()):

        G_dict[dt] = dict()



        df = daily_df[daily_df['dt_business'] == dt]

        G_dict[dt] = create_graph(df,external_agg=True,financial_instrument_agg=False,sett_agg = False)

        G_dict_ETF = create_graph(df[df['ind_etf_mkt'] == 1],external_agg=True,financial_instrument_agg=True,sett_agg = False)

        G_dict_ETF['ETF_N'] = G_dict_ETF.pop("N")

        try :

           # print(G_dict_ETF)

            G_dict_ETF['ETF_S'] = G_dict_ETF.pop("S")

        except KeyError:

            print("ETF_S not avaiable in date:", dt)

            #display(df)

            x = df

        G_dict[dt] = {**G_dict_ETF, **G_dict[dt]}

    return G_dict

In [None]:
G_covid_bef = get_daily_graph(covid_df_before)

In [None]:
G_covid_aft = get_daily_graph(covid_df_after)

In [None]:
def get_df_stats(Graph_dict,get_dict=False):

    df_stats = dict() 

    for dt in sorted(list(Graph_dict.keys())):

        for g_name in list(Graph_dict[dt].keys()):

            df_stats[dt+"_"+g_name] = compute_graph_stats(Graph_dict[dt][g_name], name=dt+"_"+g_name, str_to_write="")

    



    df = pd.DataFrame.from_dict(df_stats, orient='index')

    df.assortativity = df.assortativity.astype(float)

    df.drop('string_to_write', axis=1,inplace =True)

    if get_dict:

        return  df,df_stats

    else:

        return df

In [None]:
covid_bef_stats = get_df_stats(G_covid_bef)

In [None]:
covid_aft_stats = get_df_stats(G_covid_aft)

In [None]:
covid_bef_stats[covid_bef_stats.index.str.contains('2019-07-01')].mean()

In [None]:
def avg_timeseries_stats(df, dates = True, sett = False, feature = 'mean'):

    ensemble_dict = dict()

    if dates and not sett:

        for dt in set([i.split("_")[0] for i in df.index]):

            ensemble_dict[dt] = df[df.index.str.contains(dt)].mean()[feature]

    if dates and sett:

        sett = ['S', 'N']

        for st in sett:

            ensemble_dict[st] = dict()

            for dt in  set([i.split("_")[0] for i in df.index]):

                ensemble_dict[st][dt] = df[(df.index.str.contains(dt)) & (df.index.str.contains(st))].mean()[feature] 

    return ensemble_dict

In [None]:
def plot_trend_line(df,df2, feature="degree", name = ""):

   # plt.figure(figsize=(12,8)) 

  

   # plt.title('Network')

    df.index = pd.to_datetime(df.index)

    df2.index = pd.to_datetime(df2.index)

    

    y = df[feature].tolist()

    x = mdates.date2num(list(pd.to_datetime(df.index)))

    z = np.polyfit(x,y,1)

    p = np.poly1d(z)

    

    y2 = df2[feature].tolist()

    x2 = mdates.date2num(list(pd.to_datetime(df2.index)))

    z2 = np.polyfit(x2,y2,1)

    p2 = np.poly1d(z2)

   # plt.plot(df.index, p(x), "r--", label="degree")

   # plt.plot(df.index, y, label = "trend")

    

    fig = plt.figure(figsize=(15,12))

    sns.set(style="white", font_scale=1.5)

    plt.title("Mean Degree over time")

   # plt.plot(deg,cnt,"ro-") # degree

    plt.legend(['Dates'])

    plt.xlabel('Dates')

    plt.ylabel(feature)

    

    ax = sns.lineplot(x = df.index, y = y, label=feature+" before")

    ax = sns.lineplot(x = df2.index, y = y2, label=feature+" after") 

    ax =sns.lineplot(x =  df.index, y = p(x), color='green', label="trend before", linestyle="dashed")

    ax =sns.lineplot(x =  df2.index, y = p2(x2), color='red', label="trend after", linestyle="dashed")

    ax.legend(loc='center left', bbox_to_anchor=(1.05, 0.5),

              ncol=1, fancybox=True, shadow=True )

    ax.tick_params(axis='x', rotation=90)

    ax.set_title("Trend "+feature+name+" over time")

    

    return ax

In [None]:
def trends(feature, d1,d2, sett = False):

    plots = []

    path = ROOT_PATH+'daily/covid19/ensemble_trends/'

    os.makedirs(path, exist_ok=True)

    if sett == 'separated':

        st = [True]

    if sett == 'merge':

        st = [False]

    if sett == 'both':

        st = [True,False]

        

    for s in st:

        for feat in feature:

            bef_avg_stats_dict = avg_timeseries_stats(covid_bef_stats, sett = s,feature=feat)

            aft_avg_stats_dict = avg_timeseries_stats(covid_aft_stats, sett =s, feature=feat)



            if s:

                for type_sett in ['S','N']:

                    df1_sett =   pd.DataFrame.from_dict(bef_avg_stats_dict[type_sett], columns=[feat], orient="index")

                    df2_sett = pd.DataFrame.from_dict( aft_avg_stats_dict[type_sett], columns=[feat], orient="index")

                    plots.append(plot_trend_line(df1_sett,df2_sett,name = "_"+type_sett, feature=feat))

            else:

                df1  = pd.DataFrame.from_dict(avg_timeseries_stats(covid_bef_stats,feature=feat), columns=[feat], orient="index")

                df2 = pd.DataFrame.from_dict(avg_timeseries_stats(covid_aft_stats, feature = feat), columns=[feat], orient="index")

                plots.append(plot_trend_line(df1,df2, feat))

    

    print("Saving PDFs")

    with PdfPages(f'{path}/trends.pdf') as pdf:

            for p in plots:      

                fig=p.get_figure()

                pdf.savefig(fig,bbox_inches='tight')

    return plots


In [None]:
trends(['mean','assortativity','transitivity','std','avg_clustering','density'],covid_bef_stats,covid_aft_stats, sett='both')

In [None]:
def plot_stats_distr(df,feature, comparison =False, comparison_splitdate = None, name="",  save = False,save_name ="", trend_line=True):

    x = [i.split("_")[0] for i in list(df.index)]

    y = list(df[feature])

    x_init = x

    xd = mdates.date2num(list(pd.to_datetime(x)))

    y_init = y

  

  

 

    labels = [feature]

#     cmap = cm.get_cmap('tab20c', 1)

    colr = ['blue','red','green'] #[cmap.colors[0]]

    label2 = None

    if comparison:

        if comparison_splitdate is None:

            raise Expception("If you want to compare before and after, please specify the comparison_splitdate parameter")

        else:

            while True:

                try:

               #     print(comparison_splitdate)

               #     print(type(comparison_splitdate))

                    if type(comparison_splitdate) is tuple:

                      

                        index = [x.index(i) for i in comparison_splitdate]

                        part1 = index[0]

                        part2 = index[1]

        

                        x2 = x[part1:part2+1]

                        x3 = x[part2:] 

                        x = x[:part1+1]



                        y2 = y[part1:part2+1]

                        y3 = y[part2:]

                        y = y[:part1+1]

                    

                        

                        #cmap = cm.get_cmap('tab20c', len(index)+1)

                        #colr = cmap.colors

                        labels = [feature+" Before",feature+" BTP Emission",feature+ " After"]

                    if type(comparison_splitdate) is str:

                        index= x.index(comparison_splitdate)

                        x2 = x[index:]

                        x = x[:index+1]

                        y2 = y[index:]

                        y = y[:index+1]



                        #cmap = cm.get_cmap('tab20c', 2)

                        #colr = cmap.colors



                        labels = [feature+" Before",feature+ " After"]

                   # print("FINAL date:",comparison_splitdate)    

                    break

                except:

                    #display(df)

                    comparison_splitdate_todate = datetime.strptime(comparison_splitdate, '%Y-%m-%d')+timedelta(days=1)

                    comparison_splitdate = datetime.strftime(comparison_splitdate_todate,'%Y-%m-%d')

                   # print("TRY:",comparison_splitdate)

                    #raise Exception("Date not found in dataframe")

            

    



    if feature == 'avg_shortest_path_length':

        trend_line = False

        for i in range(len(y)):

            if type(y[i]) is str:

                y[i] = None

        if comparison:

            if type(comparison_splitdate) is str:

                for i in range(len(y2)):

                    if type(y2[i]) is str:

                        y2[i] = None         

            if type(comparison_splitdate) is tuple:   

                for i in range(len(y2)):

                    if type(y2[i]) is str:

                        y2[i] = None 

                for i in range(len(y3)):

                    if type(y3[i]) is str: 

                        y3[i] = None

    if trend_line:                    

        #xd = x_init#mdates.date2num(list(pd.to_datetime(x_init)))

        z = np.polyfit(xd,y_init,1)

        p = np.poly1d(z)

        sns.lineplot(x= x_init,y = p(xd),color='black',linewidth = 3, label='trend',linestyle="dashed")



    sns.set(rc={'figure.figsize':(30,15)}, style="white", font_scale=2.5)

    plot  = sns.lineplot(x=x,y=y, linewidth = 3, color=colr[0], label = labels[0])

    

    if comparison:

        if type(comparison_splitdate) is str:

            sns.lineplot(x=x2,y=y2, linewidth = 3, color=colr[1], label=labels[1])

        if type(comparison_splitdate) is tuple:

            sns.lineplot(x=x2,y=y2, linewidth = 3, color=colr[1], label=labels[1])

            sns.lineplot(x=x3,y=y3, linewidth = 3, color=colr[2], label=labels[2])

    



    

    plot.set_title(name+" - Distribution over time - "+feature)

    plot.set_ylabel(feature)

    plot.set_xlabel("Date")

    plt.legend()

    if len(x) >100:

        for ind, label in enumerate(plot.get_xticklabels()):

            if ind % 3 == 0:  # every 10th label is kept

                label.set_visible(True)

            else:

                label.set_visible(False)

  #  for ind, label in enumerate(plot.get_yticklabels()):

  #      if ind % 3 == 0:  # every 10th label is kept

  #          label.set_visible(True)

  #      else:

  #          label.set_visible(False)



    plot.tick_params(axis='x', rotation=90, labelsize="xx-small")

    if save: 

        if 'daily' not in os.listdir(ROOT_PATH):

            os.mkdir(ROOT_PATH+'daily')

            os.mkdir(ROOT_PATH+'daily/covid19/')

        if 'covid19' not in os.listdir(ROOT_PATH+'daily/'):

            os.mkdir(ROOT_PATH+'daily/covid19/')  

                     

        figure = plot.get_figure() 

        figure.savefig(f'{ROOT_PATH}/daily/covid19/{name}-{save_name}.png')

   #     plt.clf()

        return plot

    

    return plot

In [None]:
def get_covid_plots(df1,split_date="", path_dirs="", df_name = "",  comparison = False,skip_instr = []):

    important_features = ['mean','avg_shortest_path_length','transitivity','avg_clustering', 'assortativity', 'density']

    df_init = df1 

    

    path = ROOT_PATH+ path_dirs 

    os.makedirs(path, exist_ok=True)



    for instr in list(set([i.split('_')[1]+"_"+i.split("_")[2] for i in df_init.index])):

        if instr in skip_instr:

            continue

        print(instr)

        df = df_init[df_init.index.str.contains(instr)]

      

  

        plots = []

        

        if comparison:

            comparison_splitdate = split_date #df2[df2.index.str.contains(instr)].index[0].split("_")[0]

        else:

            comparison_splitdate = None

        for imp in important_features:



            plots.append(plot_stats_distr(df, imp, name =df_name,comparison =comparison, comparison_splitdate=comparison_splitdate, save =False))

            plt.close()

 

                     

        with PdfPages(f'{path}{df_name}_{instr}.pdf') as pdf:

            for p in plots:      

                fig=p.get_figure()

                pdf.savefig(fig)


In [None]:
get_covid_plots(covid_bef_stats,  path_dirs="daily/covid19/before/", df_name="before")

In [None]:
get_covid_plots(covid_aft_stats, path_dirs="daily/covid19/after/", df_name="after")

In [None]:
get_covid_plots(df1=pd.concat([covid_bef_stats,covid_aft_stats]), split_date= covid_aft_stats[covid_aft_stats.index.str.contains("ETF")].index[0].split("_")[0], path_dirs="daily/covid19/comparison/", comparison=True,df_name="merge")

## **Case study: BTP Italia and BTP Futura**

The impact of large emission of Government Bonds such as BTP Italia and BTP Futura on the network metric and topology. The analysis takes into consideration the next ten days after the BTP announcement. This is considered a disruptive event since large amount of instructions are exchanged during the emission dates.

BTP Italia Emissions:

- dal 14 al 17 maggio 2018
- dal 19 al 22 novembre 2018
- dal 21 al 23 ottobre 2019
- dal 18 al 20 maggio 2021

BTP Futura Emissions:

- dal 6 al 10 luglio 2020
- dal 9 al 13 novembre 2020
- dal 19 al 23 aprile 2021

In [None]:
BTP_italia_dates = {

    "May2018":('2018-05-14','2018-05-17'),

    "Nov2018":('2018-11-19', '2018-11-22'),

    "Oct2019":('2019-10-21', '2019-10-23'),

    "May2021":('2021-05-18','2021-05-20')

}



BTP_futura_dates = {

    "Jul2020":('2020-07-06', '2020-07-10'),

    "Nov2020":('2020-11-09', '2020-11-13'),

    "Apr2021":('2021-04-19', '2021-04-23')

}


In [None]:
def get_5day_diff(date,type_diff,end_date = None, to_string = True):

    days_num = 0

    i = 0 

    days_list = []

    if end_date is None:

        flag_date = 5

        end_date = date+timedelta(days=10000)

    else:

        flag_date = (end_date-date).days-1

    while len(days_list) < flag_date and date <= end_date :



        if date.weekday() < 5: 

            days_list.append(date)

        if type_diff == "before":

            date = date-timedelta(days=1)

        if type_diff == "after":

            date = date+timedelta(days=1)

        i+= 1

    if to_string:

        return [datetime.strftime(i,'%Y-%m-%d') for i in days_list]

    return days_list

In [None]:
def get_nearest_dates(daterange):



    start = daterange[0]

    end = daterange[1]

  

    start_date = datetime.strptime(start, '%Y-%m-%d')

    end_date = datetime.strptime(end, '%Y-%m-%d')



    date_of_interest = dict()



    date_of_interest['before'] = get_5day_diff(start_date, type_diff="before") 

    date_of_interest['after'] = get_5day_diff(end_date, type_diff="after")

    date_of_interest['critical'] = get_5day_diff(start_date, end_date=end_date, type_diff="after")

    date_of_interest['all'] = sorted(date_of_interest['before']+ date_of_interest['critical']+ date_of_interest['after'])

    return date_of_interest

In [None]:
dates_dict = get_nearest_dates(BTP_italia_dates['Nov2018'])
dates_dict

In [None]:
BTP_italia_dates

In [None]:
G_btp_dict = dict()

btp_stats_dict = dict()



all_btp_dict = dict()

for period in BTP_italia_dates:

    print(period)

    dates_dict = get_nearest_dates(BTP_italia_dates[period])

    btp_df = get_data(daterange=(dates_dict['all'][0],dates_dict['all'][-1]))

    G_btp_dict[period] = get_daily_graph(btp_df)

    btp_stats_dict[period] = get_df_stats(G_btp_dict[period])

    

    skips = []

    for instr in set([i.split("_")[1]+"_"+i.split("_")[2] for i in btp_stats_dict[period].index]):

        if dates_dict['critical'][0] not in [i.split("_")[0] for i in btp_stats_dict[period][btp_stats_dict[period].index.str.contains(instr)].index]:

            skips.append(instr)

    get_covid_plots(btp_stats_dict[period], (dates_dict['critical'][0],dates_dict['after'][0]),path_dirs='daily/btp/btp-italia/'+period+'/', comparison=True, df_name = "btp-italia", skip_instr = skips)



all_btp_dict['ITALIA'] = {"G":G_btp_dict, "stat":btp_stats_dict}

In [None]:
G_btp_dict = dict()

btp_stats_dict = dict()



for period in BTP_futura_dates:

    print(period)

    dates_dict = get_nearest_dates(BTP_futura_dates[period])

    btp_df = get_data(daterange=(dates_dict['all'][0],dates_dict['all'][-1]))

    G_btp_dict[period] = get_daily_graph(btp_df)

    btp_stats_dict[period] = get_df_stats(G_btp_dict[period])

    

    skips = []

    for instr in set([i.split("_")[1]+"_"+i.split("_")[2] for i in btp_stats_dict[period].index]):

        if dates_dict['critical'][0] not in [i.split("_")[0] for i in btp_stats_dict[period][btp_stats_dict[period].index.str.contains(instr)].index]:

            skips.append(instr)

    get_covid_plots(btp_stats_dict[period], (dates_dict['critical'][0],dates_dict['after'][0]),path_dirs='daily/btp/btp-futura/'+period+'/', comparison=True, df_name = "btp-futura", skip_instr = skips)

    

all_btp_dict['FUTURA'] = {"G":G_btp_dict, "stat":btp_stats_dict}