#### Env: Minh

In [2]:
import pandas as pd
from upgrade import *
from causallearn.utils.cit import CIT
import numpy as np
from pathlib import Path
import os
from tqdm import tqdm

dataname = "erdos_renyi/d40_p0.1"
# dataname = "erdos_renyi/d40_p0.1"
mi = 5      # The number of values a variable can take is ranged in [2, mi-1]
di = 1.0      # The dirichlet alpha that controls the data distribution
n = 10      # The number of data silos

TMB_activated = True

silos = []

folderpath = f"./data/distributed/{dataname}/m{mi}_d{di}_n{n}"
groundtruth = np.loadtxt(f"./data/distributed/{dataname}/adj.txt")

if not Path(folderpath).exists():
    print("Folder", folderpath, "not exist!")
else:
    for file in sorted(os.listdir(folderpath)):
        filename = os.path.join(folderpath, file)
        silo_data = pd.read_csv(filename)
        silos.append(silo_data)
        print("Loaded file:", filename, end="\t")
        print(len(silo_data), " Instances", len(silo_data.columns), "Variables")

merged_df = pd.concat(silos, axis=0)
merged_df = merged_df.reindex(sorted(merged_df.columns, key=lambda item: int(item[1:])), axis=1)
all_vars = list(merged_df.columns)

Loaded file: ./data/distributed/erdos_renyi/d40_p0.1/m5_d1.0_n10/silo-0.csv	2500  Instances 40 Variables
Loaded file: ./data/distributed/erdos_renyi/d40_p0.1/m5_d1.0_n10/silo-1.csv	2500  Instances 40 Variables
Loaded file: ./data/distributed/erdos_renyi/d40_p0.1/m5_d1.0_n10/silo-2.csv	2500  Instances 40 Variables
Loaded file: ./data/distributed/erdos_renyi/d40_p0.1/m5_d1.0_n10/silo-3.csv	2500  Instances 40 Variables
Loaded file: ./data/distributed/erdos_renyi/d40_p0.1/m5_d1.0_n10/silo-4.csv	2500  Instances 40 Variables
Loaded file: ./data/distributed/erdos_renyi/d40_p0.1/m5_d1.0_n10/silo-5.csv	2500  Instances 40 Variables
Loaded file: ./data/distributed/erdos_renyi/d40_p0.1/m5_d1.0_n10/silo-6.csv	2500  Instances 40 Variables
Loaded file: ./data/distributed/erdos_renyi/d40_p0.1/m5_d1.0_n10/silo-7.csv	2500  Instances 40 Variables
Loaded file: ./data/distributed/erdos_renyi/d40_p0.1/m5_d1.0_n10/silo-8.csv	2500  Instances 40 Variables
Loaded file: ./data/distributed/erdos_renyi/d40_p0.1/m5

In [3]:
confidence = 0.01
connectivity = {var: [] for var in all_vars}
chisq_obj = CIT(merged_df, "chisq")

for X in connectivity.keys():
    other_vars = list(set(all_vars) - set(connectivity[X]) - set([X]))
    for Y in other_vars:
        pval = chisq_obj(all_vars.index(X), all_vars.index(Y), []) # type: ignore
        if pval <= confidence: # type: ignore
            connectivity[X] = list(set(connectivity[X]) | set([Y]))
            connectivity[Y] = list(set(connectivity[Y]) | set([X]))

In [4]:
from copy import deepcopy

def GSMB(indexes, confidence=0.01):
    data = merged_df.iloc[indexes].reset_index().drop(columns=['index'])
    chisq_obj = CIT(data, "chisq") # construct a CIT instance with data and method name
    all_var_idx = [i for i in range(len(data.columns))]
    markov_blankets_idx = {i: [] for i in range(len(data.columns))}

    for X in all_var_idx:
        # S = markov_blankets_idx[X]
        S = []
        # X = 6
        prev_length = 0
        count = 0
        while True:
            count += 1
            # print("==============New cycle==================")
            for Y in list(set(all_var_idx) - set(S) - set([X])):
                if Y != X:
                    pval = chisq_obj(X, Y, S) # type:ignore
                    if pval <= confidence: # type:ignore
                        S.append(Y)
            
            for Y in deepcopy(S):
                pval = chisq_obj(X, Y, list(set(S) - set([Y]))) # type:ignore
                if pval > confidence: # type:ignore
                    S.remove(Y)
            
            if (len(S) - prev_length == 0) or (count > 2):
                break
            else:
                prev_length = len(S)
        
        # markov_blankets[data.columns[X]] = [data.columns[i] for i in S]
        markov_blankets_idx[X] = list(set(markov_blankets_idx[X])|set(S))
        # for i in S:
        #     if X not in markov_blankets_idx[i]:
        #         markov_blankets_idx[i].append(X)
    
    markov_blankets = {var: [] for var in all_vars}
    for idx, mb_idxes in markov_blankets_idx.items():
        var = all_vars[idx]
        markov_blankets[var] = [all_vars[i] for i in mb_idxes]
    
    return markov_blankets

In [5]:
def true_markov_blanket(adj_matrix, var_idx):
    parents = np.where(adj_matrix[:, var_idx])[0]
    children = np.where(adj_matrix[var_idx])[0]
    
    spouses = set()
    for c in children:
        for sp in np.where(adj_matrix[:, c])[0]:
            spouses.add(sp)
    
    spouses = list(spouses)
    return parents, children, spouses


def to_list(all_vars, mb_idx_list):
    return [all_vars[i] for i in mb_idx_list]

In [6]:
TMB_activated = 1
markov_blankets = {var: [] for var in all_vars}

if TMB_activated:
    for var in markov_blankets.keys():
        pa, ch, sp = true_markov_blanket(groundtruth, int(var[1:]) - 1)
        markov_blankets[var] = list(set(to_list(all_vars, list(set(pa)|set(ch)|set(sp)))) - set([var]))
else:
    markov_blankets = GSMB([i for i in range(len(merged_df))])

In [7]:
def recursive_conn(neighbors):
    output = []
    if len(neighbors) <= 1:
      output = [neighbors]
    else:
      for i in neighbors:
        res_i = [i] + recursive_conn(list(set(neighbors)&set(markov_blankets[i])))
        output.append(res_i)
    return output

def unfold(input):
    """
    Arguments:
      input: [var, var, ..., [var, ...], [var, ...]]

    that has a number of non-list element and a number of list element
    """
    cut_index = 0
    while cut_index < len(input):
      cut_index += 1
      if isinstance(input[cut_index], list):
        break

    out = []
    for i in range(cut_index, len(input)):
      out.append([*input[:cut_index], *input[i]])
    return out

In [8]:
recursive_outputs = {}

for anchor_var in tqdm(markov_blankets.keys()):
    recursive_outputs[anchor_var] = recursive_conn(deepcopy(markov_blankets[anchor_var]))

100%|██████████| 40/40 [00:21<00:00,  1.88it/s]


In [9]:
potential_parents = {}

for anchor_var in tqdm(markov_blankets.keys(), leave=False):
    # anchor_var = 'X1'
    recursive_output = recursive_outputs[anchor_var]

    final_output = set()
    for i in range(len(recursive_output)):
        test_case = deepcopy(recursive_output[i])
        unique_elements = set()
        if len(test_case) == 1:
            unique_elements.add(tuple(test_case))
        else:
            first_element = test_case.pop(0)
            while len(test_case):
                examine_group = test_case.pop(0)
                if len(examine_group) and not isinstance(examine_group[0], list) and isinstance(examine_group[-1], list):
                    unfolded = unfold(examine_group)
                    test_case += [*unfold(examine_group)]
                else:
                    unique_elements.add(tuple(sorted(examine_group + [first_element])))
        
        final_output = final_output|unique_elements
    potential_parents[anchor_var] = [j for j in final_output]

                                               

In [10]:
def generate_uniform_distributions(P0: np.ndarray, num_gen=100, gamma2=0.8):
    Ulist = list(np.eye(P0.shape[0]))
    # Compute the boundary points
    boundaries = []
    for i in range(len(Ulist)):
        if P0[i]/gamma2 < 1:
            alpha_i = 1/(1 - P0[i]) * (1 - P0[i]/(gamma2 + 0.001))
            boundary_i = alpha_i * P0 + (1 - alpha_i) * Ulist[i]
        else:
            boundary_i = Ulist[i]
        boundaries.append(boundary_i)
    
    boundaries = np.stack(boundaries)
    # w = np.random.uniform(0, 1, (num_gen * 1000, len(Ulist)))
    w = np.exp(np.random.rand(num_gen * 1000, len(Ulist)))
    w = w/w.sum(axis=1, keepdims=True)
    
    kmeans = KMeans(n_clusters=num_gen, n_init="auto")
    kmeans.fit(w @ boundaries)
    res = kmeans.cluster_centers_
    
    return res

def multivariate_sampling(data: pd.DataFrame, variables: list, sample_dis: dict, instance_index):
    remains = deepcopy(variables)
    while len(remains):
        sampling_var = remains.pop(0)
        distribution = sample_dis[sampling_var][instance_index]
        _, all_index = univariate_sampling(data, sampling_var, {i: distribution[i] for i in range(distribution.shape[0])})
    return all_index

In [11]:
def compute_variance_viaindexesv2(indexes: list, variable: str, parents: list):
    conditional_probs_record = merged_df[parents + [variable]].groupby(parents + [variable]).count().reset_index()
    mll_list = []
    env = 0
    for index in indexes:
        vertical_sampled_data = merged_df.iloc[index].reset_index()
        vertical_sampled_data = vertical_sampled_data.drop(columns=['index'])
        vertical_sampled_data.insert(0, 'count', [1] * len(vertical_sampled_data))
        
        summary_with_ch = vertical_sampled_data.groupby(parents + [variable])['count'].sum().reset_index()
        mll, output = compute_mll(summary_with_ch, parents, env)
        conditional_probs_record = conditional_probs_record.merge(output, on=parents + [variable], how='left')
        mll_list.append(mll)
        env += 1
    
    mean_mll = np.mean(mll_list)
    var_avg = conditional_probs_record.iloc[:, len(parents) + 1:].var(axis=1, skipna=True).mean()
    return var_avg, mean_mll, conditional_probs_record


def compute_weighted_variance_viaindexesv2(indexes: list, variable: str, parents: list):
    variance, _, df = compute_variance_viaindexesv2(indexes, variable, parents)
    if len(parents):
        joint_mat = np.array([df[f'joint_{i}'] for i in range(len(indexes))]).T
        probs_mat = np.array([df[f'probs_{i}'] for i in range(len(indexes))]).T
        probs_mean = []
        for i in range(probs_mat.shape[0]):
            if len(probs_mat[i][~np.isnan(probs_mat[i])]):
                probs_mean.append(np.mean(probs_mat[i][~np.isnan(probs_mat[i])]).item())
            else:
                probs_mean.append(0)
                
        probs_mean = np.expand_dims(np.array(probs_mean), 1)
        joint_mat = joint_mat.shape[1] * joint_mat/joint_mat.sum(axis=1, keepdims=True)
        prod = joint_mat * (probs_mat - probs_mean)**2
        return np.power(np.mean(prod[~np.isnan(prod)]), 0.5), parents
    else:
        return variance, parents


def individual_causal_search(var, silos_index, num_env=10, gamma2=0.5):
    basis = []
    ordering = sorted(markov_blankets[anchor_var], key=lambda item: len(connectivity[item]), reverse=False)
    while len(ordering):
        var = ordering.pop(0)
        discard_vars = connectivity[var]
        ordering = sorted(list(set(ordering) - set(discard_vars)), 
                        key=lambda item: len(list(set(connectivity[item]) - set(discard_vars))), reverse=False)
        basis.append(var)

    sample_dis = {var: generate_uniform_distributions(P0=marginal_prob(merged_df, [var]),
                                                    num_gen=num_env, 
                                                    gamma2=np.power(gamma2, 1./len(basis))) for var in basis}

    silos_index = [multivariate_sampling(merged_df, basis, sample_dis, i) for i in range(num_env)]
    record = []
    for var in markov_blankets[anchor_var]:
        silos_index = [multivariate_sampling(merged_df, basis, sample_dis, i) for i in range(num_env)]
        variance1, _ = compute_weighted_variance_viaindexesv2(silos_index, anchor_var, [var])
        record.append([var, variance1])
    
    return {var: record}

In [38]:
# anchor_var = 'X2'
num_env = 10
gamma2 = 0.5

basis = []
ordering = sorted(all_vars, key=lambda item: len(connectivity[item]), reverse=False)
while len(ordering):
    var = ordering.pop(0)
    discard_vars = connectivity[var]
    ordering = sorted(list(set(ordering) - set(discard_vars)), 
                    key=lambda item: len(list(set(connectivity[item]) - set(discard_vars))), reverse=False)
    basis.append(var)
        
sample_dis = {var: generate_uniform_distributions(P0=marginal_prob(merged_df, [var]),
                                                    num_gen=num_env, 
                                                    gamma2=np.power(gamma2, 1./len(basis))) for var in basis}
silos_index = [multivariate_sampling(merged_df, basis, sample_dis, i) for i in range(num_env)]

candidate_record = {}
for anchor_var in markov_blankets.keys():
    record = []
    for var in markov_blankets[anchor_var]:
        variance, _ = compute_weighted_variance_viaindexesv2(silos_index, anchor_var, [var])
        record.append([var, variance])
    
    candidate_record[anchor_var] = record

In [56]:
adj_mtx = np.ones([len(all_vars), len(all_vars)])
for var in candidate_record.keys():
    if len(candidate_record[var]):
        var_id = int(var[1:]) - 1
        for candidate, variance in candidate_record[var]:
            can_id = int(candidate[1:]) - 1
            if adj_mtx[can_id][var_id] > variance:
                adj_mtx[can_id][var_id] = variance
                

for i in range(len(all_vars)):
    for j in range(i, len(all_vars)):
        if adj_mtx[i][j] > adj_mtx[j][i]:
            adj_mtx[i][j] = 0
        elif adj_mtx[i][j] < adj_mtx[j][i]:
            adj_mtx[j][i] = 0
        else:
            adj_mtx[j][i] = 0
            adj_mtx[i][j] = 0

In [57]:
print("True parents:", np.array(all_vars)[np.where(groundtruth[:, 0])])
print("Found parents:", np.array(all_vars)[np.where(adj_mtx[:,0])])

True parents: ['X4' 'X12' 'X38']
Found parents: ['X12' 'X24' 'X28' 'X37' 'X38']


In [58]:
print("True children:", np.array(all_vars)[np.where(groundtruth[0])])
print("Found children:", np.array(all_vars)[np.where(adj_mtx[0])])

True children: ['X37']
Found children: ['X4' 'X14']


In [111]:
print("True parents:", np.array(all_vars)[np.where(groundtruth[:, 3])])
print("Found parents:", np.array(all_vars)[np.where(adj_mtx[:,3])])

True parents: ['X7' 'X33']
Found parents: ['X1' 'X9' 'X12' 'X24' 'X26' 'X28' 'X30' 'X31' 'X37' 'X38']


In [12]:
def true_markov_blanket(adj_matrix, var_idx):
    parents = np.where(adj_matrix[:, var_idx])[0]
    children = np.where(adj_matrix[var_idx])[0]
    
    spouses = set()
    for c in children:
        for sp in np.where(adj_matrix[:, c])[0]:
            spouses.add(sp)
    
    pa_sp = list(set(parents)&spouses)
    ch_sp = list(set(children)&spouses)
    spouses = list(spouses - set(pa_sp) - set(ch_sp))
    
    return parents, pa_sp, spouses, ch_sp, children


def to_list(all_vars, mb_idx_list):
    return [all_vars[i] for i in mb_idx_list]

In [13]:
anchor_var = 'X1'
anchor_var_id = int(anchor_var[1:]) - 1

pa, pa_sp, sp, ch_sp, ch = true_markov_blanket(groundtruth, anchor_var_id)

print("Parent:", to_list(all_vars, pa))
print("Pa-Sp:", to_list(all_vars, pa_sp))
print("Spouses:", to_list(all_vars, sp))
print("Ch-Sp:", to_list(all_vars, ch_sp))
print("Children:", to_list(all_vars, ch))


num_env = 10
gamma = 0.8

pseudo_pa = []
pseudo_ch = []

for mb_var in markov_blankets[anchor_var]:
# for mb_var in np.array(all_vars)[np.where(groundtruth[anchor_var_id])]:
    silos_index = []
    # ============= First test =============== #
    sample_dis = {var: generate_uniform_distributions(P0=marginal_prob(merged_df, [var]),
                                                        num_gen=num_env, 
                                                        gamma2=gamma2) for var in [mb_var]}
    silos_index += [multivariate_sampling(merged_df, [mb_var], sample_dis, i) for i in range(num_env)]

    sample_dis = {var: generate_uniform_distributions(P0=marginal_prob(merged_df, [var]),
                                                        num_gen=num_env, 
                                                        gamma2=gamma2) for var in [anchor_var]}
    silos_index += [multivariate_sampling(merged_df, [anchor_var], sample_dis, i) for i in range(num_env)]


    # ============= Second test =============== #
    variance1, _ = compute_weighted_variance_viaindexesv2(silos_index, anchor_var, [mb_var])
    variance2, _ = compute_weighted_variance_viaindexesv2(silos_index, mb_var, [anchor_var])

    if variance1 > variance2:
        pseudo_ch.append(mb_var)
    else:
        pseudo_pa.append(mb_var)

print("Pseudo-pa:", pseudo_pa)
print("Pseudo-ch:", pseudo_ch)

Parent: ['X4', 'X8', 'X12', 'X15', 'X18', 'X38']
Pa-Sp: []
Spouses: []
Ch-Sp: []
Children: []


NameError: name 'gamma2' is not defined

In [144]:
potential_parents[anchor_var]

[('X13', 'X19', 'X21', 'X26', 'X28'),
 ('X13', 'X28', 'X34'),
 ('X2', 'X28', 'X29', 'X34', 'X37')]

In [75]:

print(variance)

0.0013056590460801324


In [13]:
individual_silos = {}
num_env = 50
gamma2 = 0.5

for var in markov_blankets['X2'] + ['X2']:
    sample_dis = {var: generate_uniform_distributions(P0=marginal_prob(merged_df, [var]), num_gen=num_env, gamma2=gamma2)}
    individual_silos[var] = [multivariate_sampling(merged_df, [var], sample_dis, i) for i in range(num_env)]

In [14]:
individual_silos.keys()

dict_keys(['X5', 'X18', 'X20', 'X34', 'X16', 'X38', 'X31', 'X33', 'X30', 'X29', 'X3', 'X32', 'X28', 'X15', 'X37', 'X2'])

In [15]:
record = []
for var in markov_blankets['X2']:
    variance1, _ = compute_weighted_variance_viaindexesv2(individual_silos['X2'], 'X2', [var])
    variance2, _ = compute_weighted_variance_viaindexesv2(individual_silos[var], 'X2', [var])
    record.append([var, variance1, variance2])

In [17]:
sorted(record, key=lambda item: item[2])

[['X15', 0.0001900063687383028, 4.2406626702840014e-07],
 ['X32', 0.00019012827455625758, 4.7520896413502624e-07],
 ['X33', 0.0001931070557810126, 4.978066892876072e-07],
 ['X30', 0.0001901072523611351, 5.000685366621823e-07],
 ['X20', 0.000190217478224416, 5.066069409215515e-07],
 ['X37', 0.00019009301578272187, 5.243085231617147e-07],
 ['X3', 0.00019265404864177594, 5.291254445804123e-07],
 ['X18', 0.00019592820726375425, 5.348029324125226e-07],
 ['X34', 0.0002532863195508458, 5.551547050686838e-07],
 ['X38', 0.0002532448863774968, 6.43807855131014e-07],
 ['X29', 0.00025320974251718024, 6.672178480474078e-07],
 ['X5', 0.00025316808179979736, 7.06592077250697e-07],
 ['X31', 0.00025816278747927877, 7.076029880529071e-07],
 ['X16', 0.00038316659876494084, 7.300694448082547e-07],
 ['X28', 0.00025323226567974203, 7.721154937057825e-07]]

In [18]:
variance, _ = compute_weighted_variance_viaindexesv2(individual_silos['X2'], 'X2', ['X16', 'X18', 'X31', 'X33'])
print(variance)

1.1421332966310986e-05


In [None]:
var = 'X28'
indx = int(var[1:]) - 1
print(f"Parent of {var}:", np.array(all_vars)[np.where(groundtruth[:, indx])])
print(f"Children of {var}:", np.array(all_vars)[np.where(groundtruth[indx])])

In [None]:
from multiprocessing import Pool
from typing import List, Tuple

# Function to execute F in parallel
def execute_in_parallel(args_list: List[Tuple]):
    with Pool() as pool:
        # Map the function F to the arguments in parallel
        results = pool.starmap(individual_causal_search, args_list)
    return results

In [None]:
invariance_hardcap = 1e-3

outputs = []
inputs = [(var, potential_parents[var], silos_index, invariance_hardcap) for var in ['X2']] #potential_parents.keys()
outputs = execute_in_parallel(inputs)

In [None]:
outputs

In [None]:
true_causal_parents = ()
for item in outputs:
    true_causal_parents += tuple(item.items())

true_causal_parents = dict(true_causal_parents)
true_causal_parents

In [None]:
adj_mtx = np.zeros([len(all_vars), len(all_vars)])
for var in true_causal_parents.keys():
    if len(true_causal_parents[var]):
        parents, invariance = true_causal_parents[var]
        var_id = int(var[1:]) - 1
        for pa in parents:
            pa_id = int(pa[1:]) - 1
            if adj_mtx[var_id][pa_id] == 0:
                adj_mtx[pa_id][var_id] = invariance
            elif adj_mtx[var_id][pa_id] > adj_mtx[pa_id][var_id]:
                adj_mtx[pa_id][var_id] = invariance
                adj_mtx[var_id][pa_id] = 0

In [None]:
from plot_utils import true_edge, spur_edge, fals_edge, miss_edge, swap_pos


etrue = true_edge(groundtruth, adj_mtx)
espur = spur_edge(groundtruth, adj_mtx)
efals = fals_edge(groundtruth, adj_mtx)
emiss = miss_edge(groundtruth, adj_mtx)

# print(etrue)
print(len(etrue), len(espur), len(emiss), len(efals))

In [None]:
import matplotlib.pyplot as plt
import networkx as nx

G = nx.DiGraph()

fin_adjmtx = adj_mtx

for i in range(fin_adjmtx.shape[0]):
    for j in range(fin_adjmtx.shape[1]):
        if fin_adjmtx[i][j] > 0:
            G.add_edge(f"X{i+1}", f"X{j+1}", weight=np.round(1/fin_adjmtx[i][j],2))
            # print("Here add edge", f"X{i+1}", f"X{j+1}")
    G.add_node(f"X{i+1}")
    

etrue = true_edge(groundtruth, fin_adjmtx)
espur = spur_edge(groundtruth, fin_adjmtx)
efals = fals_edge(groundtruth, fin_adjmtx)
emiss = miss_edge(groundtruth, fin_adjmtx)

# print(etrue)
print(len(etrue), len(espur), len(emiss), len(efals))

pos = nx.shell_layout(G)
pos = swap_pos(pos, 'X4', 'X3')
# pos = swap_pos(pos, 'X3', 'X5')
# pos = swap_pos(pos, 'X10', 'X5')
# pos = swap_pos(pos, 'X10', 'X8')
# pos = swap_pos(pos, 'X8', 'X3')
# pos = swap_pos(pos, 'X6', 'X5')
# pos = swap_pos(pos, 'X1', 'X2')
# pos = swap_pos(pos, 'X11', 'X8')
# pos = swap_pos(pos, 'X7', 'X19')
# pos = swap_pos(pos, 'X20', 'X7')
# pos = swap_pos(pos, 'X18', 'X16')


# nodes
nx.draw_networkx_nodes(G, pos, node_size=400, node_color="#1f78b4")

# edges
nx.draw_networkx_edges(G, pos, edgelist=espur, width=2, arrowstyle='->', arrowsize=20, edge_color="orange", label="Spurious Edges")
nx.draw_networkx_edges(G, pos, edgelist=emiss, width=2, arrowstyle='->', arrowsize=20, edge_color="purple", label="Missing Edges")
nx.draw_networkx_edges(G, pos, edgelist=efals, width=2, arrowstyle='->', arrowsize=20, edge_color="red", label="Anti-Causal Edges")
nx.draw_networkx_edges(G, pos, edgelist=etrue, width=2, arrowstyle='->', arrowsize=20, edge_color="green", label="Causal Edges")

# node labels
nx.draw_networkx_labels(G, pos, font_size=12, font_family="sans-serif", font_color='white')

# edge weight labels
# edge_labels = nx.get_edge_attributes(G, "weight")
# nx.draw_networkx_edge_labels(G, pos, edge_labels)

ax = plt.gca()
ax.margins(0.08)
plt.axis("off")
plt.tight_layout()
# plt.box()
plt.title(dataname.upper())
# plt.legend()
plt.show()

# plt.savefig("res/asia.plot.svg", format="svg")