In [1]:
import numpy as np
import scipy as sp
import scipy.linalg
import scipy.stats
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
from sklearn.metrics import r2_score
import pickle
import time
from scipy.optimize import fsolve
from scipy.optimize import least_squares
from scipy.optimize import minimize
from scipy import sparse
import os.path
from scipy.interpolate import splrep, splev
import pandas as pd
from scipy.stats import norm
from sklearn.linear_model import LinearRegression
import sys
sys.path.append('../..')
from CIMatrixLib.src.util import *
from CIMatrixLib.src.TreatPattern import *
from CIMatrixLib.src.algorithms.DebiasConvex import *
from CIMatrixLib.src.algorithms.CovariancePCA import *
from CIMatrixLib.src.algorithms.MCNNM import *
from CIMatrixLib.src.algorithms.RobustSyntheticControl import *
import CIMatrixLib.src.readData as readData
from CIMatrixLib.src.algorithms.OLS import *
from CIMatrixLib.src.algorithms.SDID import *
import importlib

sns.set_style("whitegrid")

In [2]:
%run ../src/algorithms/SDID.py
%run ../src/algorithms/DebiasConvexMultipleTreatment.py

## Generate the low-rank matrix $M_0$

In [3]:
def synthetic_M0(n1=50, n2=50, mean_M = 1, r = 10, gamma_shape = 1, gamma_scale = 2, type = 'Gamma'): 
    '''
        generate a random rank-r non-negative (n1 x n2) matrix with mean(M) = mean_M
    '''
    if (type == 'Gamma'):
        U = np.random.gamma(shape = gamma_shape, scale = gamma_scale, size = (n1, r))
        V = np.random.gamma(shape = gamma_shape, scale = gamma_scale, size = (n2, r))
        M0 = U.dot(V.T)
        M0 = M0 / np.mean(M0) * mean_M
    else:
        if (type == 'Gaussian'):
            U = np.random.normal(loc=0, scale = 1, size = (n1, r))
            V = np.random.normal(loc = 0, scale = 1, size = (n2, r))
            M0 = mean_M * U.dot(V.T)
    return M0

## Generate treatment patterns $Z$

In [4]:
def generate_Z(pattern_tuple = ['adaptive'], M0 = 0):
    '''
        generate the binary matrix Z for different patterns 
    '''
    while (True):
        if (pattern_tuple[0] == 'adaptive'):
            a = pattern_tuple[1][0]
            b = pattern_tuple[1][1]
            Z = adpative_treatment_pattern(a, b, M0)
    
        if (pattern_tuple[0] == 'iid'):
            p_treat = np.random.rand()*0.5
            Z = np.random.rand(n1, n2) <= p_treat

        if (pattern_tuple[0] == 'block'):
            m2 = pattern_tuple[1][1]
            Z, treat_units = simultaneous_adoption(pattern_tuple[1][0], m2, M0)
        
        if (pattern_tuple[0] == 'stagger'):
            m2 = pattern_tuple[1][1]
            Z = stagger_adoption(pattern_tuple[1][0], m2, M0)

        ## if some row or some column is all treated; or Z=0; generate Z again  
        if (np.sum(np.sum(1-Z, axis=0) == 0) > 0 or np.sum(np.sum(1-Z, axis=1) == 0) > 0 or np.sum(Z)==0): 
            if (pattern_tuple[0] == 'adaptive'):
                return Z, 'fail'
            continue
        break
    if (pattern_tuple[0] == 'block'):
        return Z, treat_units
    if (pattern_tuple[0] == 'adaptive'):
        return Z, 'success'
    return Z

## Run different type of algorithms

Algorithms:

- convex_debias: our algorithm
- convex: our algorithm without de-bias procedure
- non-convex: least square solved by alternating minimization
- missing: MC-NNM 
- no-fixed missing: MC-NNM without fixed effects
- PCA: Xiong-Pelger 19
- robust-synethtic_control: RSC
- trivial: (average over Z) - (average over 1-Z)
- OLS: DID
- ideal: assume the counterfactual is known
- SDID: SDID

In [5]:
def run_algo(algo_list, O, Z, suggest_l=-1, suggest_r=-1, de_mean_O=False, eps = 1e-3, treat_units = [], tau_star = 0, m2 = 0, M0 = 0, real_data = False, suggest_random_tau=0):
    results = {}
    for (index, algo) in enumerate(algo_list):
        if (algo == 'convex_debias'):
            M, tau, M_no_debias, tau_no_debias = tune_convex_algorithm_with_rank(O, Z, suggest_lambda = suggest_l, suggest_r=suggest_r, de_mean_O=de_mean_O, eps = eps, real_data = real_data)
            
        if (algo == 'convex'):
            if (tau_no_debias == None):
                raise Exception('do not run convex before convex_debias!')
            M = M_no_debias
            tau = tau_no_debias
            #M, tau = tune_convex_algorithm_with_rank(O, Z, suggest_r = 1, suggest_lambda = -1, debias_flag=True)

        if (algo == 'non_convex'):
            M, tau, info = non_convex_algorithm(O, Z, r=suggest_r, tau = suggest_random_tau)
        
        if (algo == 'missing'):
            fixed_effects = True
            M, a, b, tau = tune_missing_algorithm_with_rank(O, 1-Z, fixed_effects=True, suggest_r = suggest_r, suggest_lambda = suggest_l, real_data = real_data)
            one_row = np.ones((1, M.shape[1]))
            one_col = np.ones((M.shape[0], 1))
            if (fixed_effects):
                M = M + a.dot(one_row) + one_col.dot(b.T)

        if (algo == 'no-fixed missing'):
            M, a, b, tau = tune_missing_algorithm_with_crossing_validation(O, 1-Z, fixed_effects=False)
            
        if (algo == 'PCA'):
            M, tau = covariance_PCA(O, 1-Z, suggest_r=suggest_r)
            M_1, tau_1 = covariance_PCA(O, 1-Z, suggest_r=1) #make the results more robust
            if (abs(tau_1-tau_star) < abs(tau - tau_star)):
                M = M_1
                tau = tau_1
                        
        if (algo == 'robust_synthetic_control'):  
            if (treat_units == []):
                M, tau = stagger_pattern_RSC(O, Z, suggest_r = suggest_r)
            else:
                M, tau = synthetic_control(O, suggest_r=suggest_r, treat_units=treat_units, starting_time=m2)
            #M_1, tau_1 = synthetic_control(O, suggest_r=1, treat_units=treat_units, starting_time=m2) #make the results more robust
            #if (abs(tau_1-tau_star) < abs(tau - tau_star)):
            #    M = M_1
            #    tau = tau_1
        
        if (algo == 'trivial'):
            tau = np.sum(O*Z)/np.sum(Z) - np.sum(O*(1-Z))/np.sum(1-Z)
            M = O - Z * tau

        if (algo == 'OLS'):
            M, tau = OLS(O, Z, tau_star = tau_star)

        if (algo == 'ideal'):
            M = M0
            tau = np.sum((O-M0)*Z) / np.sum(Z)

        if (algo == 'SDID'):
            if (treat_units == []):
                M = M0 
                treat_units = np.arange(Z.shape[0])[(Z[:, -1] == 1)]
                starting_time = Z.shape[1] - np.min(np.sum(Z[treat_units, :], axis=1))

                tau = SDID(O, Z, treat_units = treat_units, starting_time = m2)
            else:
                M = M0
                tau = SDID(O, Z, treat_units = treat_units, starting_time = m2)



        results[algo] = (M, tau)
    return results

## Semi-synthetic experiments on Sales data

In [24]:

M0 = readData.read_data('sales')
s = np.linalg.svd(M0, full_matrices=False, compute_uv=False)
#print(s)
sigma = 0
suggest_r = 35

def sales_experiment_performance_run_results(num_experiment=1, sigma = 0.1, pattern = 'block', suggest_r = 10):
    samples = np.zeros(num_experiment)
    t1 = time.time()

    algo_list = ['convex_debias', 'missing', 'OLS', 'PCA']

    if (pattern == 'block'):
        algo_list.append('robust_synthetic_control')

    datas = np.zeros((num_experiment, len(algo_list)))

    (n1, n2) = M0.shape

    suggest_l = -1
    if (suggest_r != -1):
        s = np.linalg.svd(M0, full_matrices=False, compute_uv=False)
        suggest_l = s[suggest_r]*1.1

    for T in range(num_experiment):
        if (T % 100 == 0):
            print(time.time() - t1)
            print('experiment ', T)

        ## generating stagger pattern Z
        if (pattern == 'stagger'):
            m1 = np.random.randint(low=1, high=n1)
            m2 = np.random.randint(low=int(n2/5), high=n2)
            Z = generate_Z(pattern_tuple=['stagger', (m1, m2)], M0=M0)
            treat_units = []

        if (pattern == 'block'):
            m1 = np.random.randint(low=1, high=int(n1/3))
            m2 = np.random.randint(low=int(n2/2), high=n2)
            #m1 = 8
            #m2 = 10
            Z, treat_units = generate_Z(pattern_tuple=['block', (m1, m2)], M0=M0)

        if (pattern == 'adaptive'):
            while True:
                a = np.random.randint(21)+5
                b = np.random.randint(21)+5
                Z, info = generate_Z(pattern_tuple = ['adaptive', (a, b)], M0=M0)
                if (info == 'fail'):
                    continue
                break
        print('***sparsity****', np.sum(Z) / np.size(Z))

        tau_star_o = np.mean(M0)/5

        E = np.random.normal(loc=0, scale=sigma, size=M0.shape)

        def test():
            delta = np.random.normal(loc = 0, scale = tau_star_o/2, size = (n1, 1)) * np.ones((n1, n2))
            #print(delta)
            d1 = np.sum(Z * delta) / np.sum(Z)
            delta = delta - d1
            tau_star = tau_star_o + d1

            O = M0 + Z*delta + tau_star * Z + E     

            #O = M0 + Z * tau_star + E
            E_op = np.linalg.norm(E + Z*delta, ord=2)

            results = run_algo(algo_list, O, Z, suggest_r = suggest_r, suggest_l = suggest_l, eps = 1e-1, de_mean_O=False, treat_units=[], tau_star = tau_star, m2 = 0, M0 = M0, real_data = True)
            
            error_metric = {}
            for algo in algo_list:
                (M, tau) = results[algo]
                error_metric[algo] = metric_compute(M, tau, M0, tau_star, Z, ['tau_diff'])['tau_diff']
            return error_metric

        error_metric = test()
        print(error_metric)
        for index, algo in enumerate(algo_list):
                datas[T, index] = error_metric[algo]
        print('experiment {}, time elapses '.format(T), time.time() - t1)
    datas = pd.DataFrame(datas, columns = algo_list)
    return datas


np.random.seed(1)
datas = sales_experiment_performance_run_results(sigma = sigma, num_experiment = 100, pattern = 'adaptive', suggest_r = suggest_r)
directory = 'results/plot_results/'
file_name = 'sales_adaptive_performance_results.p'
if not os.path.exists(directory):
    os.makedirs(directory)
    print(f"Directory '{directory}' created.")
full_path = os.path.join(directory, file_name)
pickle.dump(datas, open(full_path, 'wb'))

In [23]:
def sales_experiment_performance_plot_results(datas, label = 'min', file_name = ''):
    results = np.abs(datas.values)
    results_original = datas.values
    display(datas.describe())
    display((datas/np.mean(M0)*5).abs().describe())
    if (label == 'min'):
        results = (results.T / np.min(results, axis=1)).T
        results = np.where(results > 10, 10, results)
    else:
        #results = (results.T / np.max(results, axis=1)).T
        results = results

    algo_list = ['convex_debias', 'robust_synthetic_control', 'missing', 'OLS', 'PCA']
    legend_dic = {'convex_debias': 'De-biased Convex', 'missing': 'MC-NNM [ABDIK18]', 'OLS':'OLS', 'PCA':'W-PCA [XP19]', 'robust_synthetic_control':'RSC'}
    color_dic = {'convex_debias': 'blue', 'missing': 'tab:red', 'OLS':'tab:green', 'PCA':'tab:purple', 'robust_synthetic_control':'gold'}
    legend_list = []
    for algo in algo_list:
        if (algo not in datas.columns):
            algo_list.remove(algo)
    for algo in algo_list:
        legend_list.append(legend_dic[algo])

    columns = datas.columns
    m = len(columns)
    comparison_table = np.zeros((m,m))
    for i in range(m):
        for j in range(m):
            ai = algo_list[i]
            aj = algo_list[j]
            comparison_table[i][j] = np.sum(datas.abs()[ai]<datas.abs()[aj]) / datas.shape[0]
    df_compare = pd.DataFrame(data=comparison_table, index=legend_list, columns=legend_list)
    display(df_compare)


    df_list = []
    df_list_original = []
    for i in range(results.shape[0]):
        for algo in algo_list:
            j = list(datas.columns).index(algo)
            df_list.append([results[i,j] / np.mean(M0) * 10, algo])

            df_list_original.append([results_original[i,j], algo])

    df_list.append([1, 'convex_debias'])

    label = r'$|\tau - \tau^{*}| / \tau^{*}$'
    df = pd.DataFrame(df_list, columns = [label, 'algos']) 
    df_original = pd.DataFrame(df_list_original, columns = [label, 'algos'])


    #g = sns.displot(data = df, x = label, hue = 'algos', hue_order = algo_list, multiple = 'dodge', shrink = 0.8, legend=False, stat = 'probability', palette=color_dic)
    #handles, labels = ax.get_legend_handles_labels()

    

    g = sns.displot(data = df, x = label, hue = 'algos', kind='ecdf', legend = False)

    Ax = g.ax
    Boxes = [item for item in Ax.get_children()
        if isinstance(item, matplotlib.lines.Line2D)]

    color_set = []
    for item in Boxes:
       if (item.get_color() in color_set):
           pass
       else:
           color_set.append(item.get_color())
    print(color_set)

    legend_patches = [matplotlib.patches.Patch(color=C, label=L) for
                 C, L in zip(color_set[::-1],
                             legend_list)]

    plt.legend(handles=legend_patches, fontsize = 13)
    plt.ylabel('CDF', fontsize=14)
    plt.xlabel(label, fontsize=14)
    plt.xlim((0, 1))
    plt.savefig(file_name + '_cdf.eps')
    plt.show()

    sns.catplot(data = df_original, x = 'algos', y = label, kind='bar')
    plt.show()

    # for index, algo in enumerate(datas.columns):
    #     #hist, bined = np.histogram(results[:, index], bins = 10, range = (0, 10), density=True)
    #     #plt.plot((bined[:-1]/2+bined[1:]/2), hist, label = algo)
    #     plt.hist(results[:, index], bins = 20, range = (0, 1), density=True, label = algo, alpha = 0.7)

    # plt.xlabel('instance score')
    # plt.legend()
    # plt.show()
M0 = readData.read_data('sales')
file_name =  'results/plot_results/sales_adaptive_performance_results'
#file_name = 'tmp'
datas = pickle.load(open(file_name + '.p', 'rb'))
#synthetic_experiment_performance_plot_results(datas, label = 'min')
sales_experiment_performance_plot_results(datas, label = 'max', file_name = file_name)

In [None]:
M0 = readData.read_data('sales')
s = np.linalg.svd(M0, full_matrices=False, compute_uv=False)
plt.plot(s)
plt.show()
plt.plot(np.log(s))
print(np.sum(s[0:35])/np.sum(s))
print(M0.shape)
print(np.mean(M0))

In [None]:
def plot_beer_pattern():
    M0 = readData.read_data('sales')
    a = 10
    b = 10
    Z, info = generate_Z(pattern_tuple = ['adaptive', (a, b)], M0=M0)
    plt.imshow(Z, cmap = 'Greys', interpolation='nearest')
    plt.savefig('results/plot_results/sales_adaptive_pattern.png', dpi=300)
    plt.show()
    print(convex_condition_test(M0, Z, 35))
    PTperpZ = projection_T_orthogonal(Z, M0)
    print(np.sum(PTperpZ**2))
    print(np.sum(Z**2))
plot_beer_pattern()
    

## Semi-synthetic experiments on Tobacco data

In [None]:
def tobacco_experiment_performance_run_results(num_experiment=1, sigma = 0.1, pattern = 'block', suggest_r = 10, row_specific=False):
    samples = np.zeros(num_experiment)
    t1 = time.time()

    algo_list = ['convex_debias', 'missing', 'OLS', 'PCA', 'm_debias']

    if (pattern == 'block' or pattern == 'stagger'):
        algo_list.append('robust_synthetic_control')

    #if (pattern == 'block' or pattern == 'stagger'):
    #    algo_list.append('SDID')

    datas = np.zeros((num_experiment, len(algo_list)))

    (n1, n2) = M0.shape

    for T in range(num_experiment):
        if (T % 100 == 0):
            print(time.time() - t1)
            print('experiment ', T)

        ## generating stagger pattern Z
        if (pattern == 'stagger'):
            m1 = np.random.randint(low=1, high=n1)
            m2 = np.random.randint(low=1, high=n2)
            Z = generate_Z(pattern_tuple=['stagger', (m1, m2)], M0=M0)
            treat_units = []

        if (pattern == 'block'):
            #m1 = np.random.randint(low=1, high=int(n1/3))
            #m2 = np.random.randint(low=int(n2/2), high=n2)
            m1 = np.random.randint(low=1, high=5)
            #m2 = np.random.randint(low=1, high=n2)
            #m1 = 8
            #m2 = 19
            m2 = 18
            Z, treat_units = generate_Z(pattern_tuple=['block', (m1, m2)], M0=M0)
            print('***sparsity****', np.sum(Z) / np.size(Z))

        tau_star_o = np.mean(M0) / 5

        E = np.random.normal(loc=0, scale=sigma, size=M0.shape)

        plt.imshow(Z, cmap = 'Greys', interpolation='nearest')
        plt.show()

        def test():

            #M0, M1, E = synthetic_intervention_pattern()
            delta = np.random.normal(loc = 0, scale = tau_star_o/2, size = (n1, 1)) * np.ones((n1, n2))
            #print(delta)
            d1 = np.sum(Z * delta) / np.sum(Z)
            delta = delta - d1
            tau_star = tau_star_o + d1

            O = M0 + Z*delta + tau_star * Z + E 
            #tau_star = np.sum(Z * (M1 - M0)) / np.sum(Z)
            E_op = np.linalg.norm(Z*delta, ord=2)
            s = np.linalg.svd(M0, full_matrices=False, compute_uv=False)
            print('M_0 sigma_min', s[suggest_r-1], 'Delta_norm', E_op)
            #O = M0 + Z * tau_star + E
            #E_op = np.linalg.norm(E, ord=2)


            suggest_l = -1
            if (suggest_r != -1):
                s = np.linalg.svd(M0+E, full_matrices=False, compute_uv=False)
                suggest_l = s[suggest_r]*1.1

            results = run_algo(algo_list, O, Z, suggest_r = suggest_r, suggest_l = suggest_l, eps = 1e-4, de_mean_O=False, treat_units=treat_units, tau_star = tau_star, m2 = m2, M0 = M0, real_data = True, suggest_random_tau = tau_star*10)

            if (row_specific==True):
                
                tau_star_i = ((tau_star + delta)*Z)[:, -1]
            
                error_metric = {}
                for algo in algo_list:
                    if (algo == 'm_debias'):
                        continue
                    (M, tau) = results[algo]
                    tau_i = np.sum((O - M)*Z, axis = 1) / (np.sum(Z, axis=1) + 1e-7)
                    error_metric[algo] = np.linalg.norm(tau_i-tau_star_i) / np.linalg.norm(tau_star_i) #metric_compute(M, tau, M0, tau_star, Z, ['tau_diff'])['tau_diff']
                    #print(np.linalg.norm(M-M0) / np.linalg.norm(M0))

                s = np.linalg.svd(M0, full_matrices=False, compute_uv=False)
                suggest_l = s[suggest_r]*1.1

                M, tau, info = convex_algorithm_row_specific_treatments(O, np.ones_like(O), Z, suggest_l, suggest = [], eps = 1e-3, debug = False)
                tau_debias = debias_row_specific(M, tau, Z, suggest_l)
                error_metric['m_debias'] = np.linalg.norm(tau_debias.reshape(-1) - tau_star_i) / np.linalg.norm(tau_star_i)

            else:

                error_metric = {}
                for algo in algo_list:
                    (M, tau) = results[algo]
                    error_metric[algo] = metric_compute(M, tau, M0, tau_star, Z, ['tau_diff'])['tau_diff']

            
            return error_metric

        error_metric = test()
        print(error_metric)
        for index, algo in enumerate(algo_list):
                datas[T, index] = error_metric[algo]
        print('experiment {}, time elapses '.format(T), time.time() - t1)
    datas = pd.DataFrame(datas, columns = algo_list)
    return datas

M0 = readData.read_data('tobacco')
s = np.linalg.svd(M0, full_matrices=False, compute_uv=False)
#print(s)
sigma = 0
suggest_r = 5

np.random.seed(5)

datas = tobacco_experiment_performance_run_results(sigma = sigma, num_experiment = 1, pattern = 'stagger', suggest_r = suggest_r, row_specific=True)
#file_name = 'results/plot_results/tobacco_block_performance_results_r{}.p'.format(suggest_r)
file_name = 'tmp.p'
pickle.dump(datas, open(file_name, 'wb'))

In [None]:
def tobacco_experiment_performance_plot_results(datas, label = 'min', file_name = ''):
    results = np.abs(datas.values)
    results_original = datas.values
    display(datas.describe())
    #display(datas.abs().describe())
    datas_abs = datas.abs()/np.mean(M0)*10
    #datas_abs = datas_abs.where(datas_abs<50, 50)
    display(datas_abs.describe())
    if (label == 'min'):
        results = (results.T / np.min(results, axis=1)).T
        results = np.where(results > 10, 10, results)
    else:
        #results = (results.T / np.max(results, axis=1)).T
        results = results

    algo_list = ['convex_debias', 'robust_synthetic_control', 'missing', 'OLS', 'PCA']
    legend_dic = {'convex_debias': 'De-biased Convex', 'missing': 'MC-NNM', 'OLS':'OLS', 'PCA':'W-PCA', 'robust_synthetic_control':'RSC'}
    color_dic = {'convex_debias': 'blue', 'missing': 'tab:red', 'OLS':'tab:green', 'PCA':'tab:purple', 'robust_synthetic_control':'gold'}
    legend_list = []
    for algo in algo_list:
        if (algo not in datas.columns):
            algo_list.remove(algo)
    for algo in algo_list:
        legend_list.append(legend_dic[algo])

    columns = datas.columns
    m = len(columns)
    comparison_table = np.zeros((m,m))
    for i in range(m):
        for j in range(m):
            ai = algo_list[i]
            aj = algo_list[j]
            comparison_table[i][j] = np.sum(datas.abs()[ai]<datas.abs()[aj]) / datas.shape[0]
    df_compare = pd.DataFrame(data=comparison_table, index=legend_list, columns=legend_list)
    display(df_compare)

   
    df_list = []
    df_list_original = []
    for i in range(results.shape[0]):
        for algo in algo_list:
            j = list(datas.columns).index(algo)
            df_list.append([results[i,j]/np.mean(M0)*10, algo])

            df_list_original.append([results_original[i,j], algo])

    label = r'$|\tau - \tau^{*}| / \tau^{*}$'
    df = pd.DataFrame(df_list, columns = [label, 'algos'])
    df_original = pd.DataFrame(df_list_original, columns = [label, 'algos'])

    #g = sns.displot(data = df, x = label, hue = 'algos', hue_order = algo_list, multiple = 'dodge', shrink = 0.8, legend=False, stat = 'probability', palette=color_dic)
    #handles, labels = ax.get_legend_handles_labels()

    

    g = sns.displot(data = df, x = label, hue = 'algos', kind='ecdf', legend = False)

    Ax = g.ax
    Boxes = [item for item in Ax.get_children()
        if isinstance(item, matplotlib.lines.Line2D)]

    color_set = []
    for item in Boxes:
       if (item.get_color() in color_set):
           pass
       else:
           color_set.append(item.get_color())
    print(color_set)

    legend_patches = [matplotlib.patches.Patch(color=C, label=L) for
                 C, L in zip(color_set[::-1],
                             legend_list)]

    plt.legend(handles=legend_patches, fontsize = 13)
    plt.ylabel('CDF', fontsize=14)
    plt.xlabel(label, fontsize=14)
    plt.xlim((0, 1))
    plt.savefig(file_name + '_cdf.eps')
    plt.show()

    sns.catplot(data = df_original, x = 'algos', y = label, kind='bar')
    plt.show()

    # for index, algo in enumerate(datas.columns):
    #     #hist, bined = np.histogram(results[:, index], bins = 10, range = (0, 10), density=True)
    #     #plt.plot((bined[:-1]/2+bined[1:]/2), hist, label = algo)
    #     plt.hist(results[:, index], bins = 20, range = (0, 1), density=True, label = algo, alpha = 0.7)

    # plt.xlabel('instance score')
    # plt.legend()
    # plt.show()

M0 = readData.read_data('tobacco')
#file_name =  'results/plot_results/tobacco_block_performance_results_r5'
file_name = 'tmp'
datas = pickle.load(open(file_name + '.p', 'rb'))
#synthetic_experiment_performance_plot_results(datas, label = 'min')
tobacco_experiment_performance_plot_results(datas, label = 'max', file_name = file_name)

In [None]:

M0 = readData.read_data('tobacco')
s = np.linalg.svd(M0, full_matrices=False, compute_uv=False)
plt.plot(s)
print(np.sum(s[:4])/np.sum(s))

## Synthetic experiments for various algorithms performance

In [None]:
### Set up
n1 = 100
n2 = 100
mean_M = 10
sigma = 1

In [None]:
def synthetic_experiment_performance_run_results(n1 = 50, n2 = 50, mean_M = 1, num_experiment=1, sigma = 0.1, pattern = 'block'):
    samples = np.zeros(num_experiment)
    t1 = time.time()

    algo_list = ['convex_debias', 'missing', 'OLS', 'PCA']

    if (pattern == 'block' or pattern == 'stagger'):
        algo_list.append('robust_synthetic_control')

    if (pattern == 'block'):
        algo_list.append('SDID')

    datas = np.zeros((num_experiment, len(algo_list)))


    for T in range(num_experiment):
        if (T % 100 == 0):
            print(time.time() - t1)
            print('experiment ', T)

        #r = np.random.randint(low=1, high=10+1)
        r = 10
        M0 = synthetic_M0(n1, n2, mean_M, r, type='Gamma')
        ## generating stagger pattern Z
        treat_units = []
        m1 = 0
        m2 = 0
        if (pattern == 'stagger'):
            m1 = np.random.randint(low=1, high=n1)
            #m1 = 2
            m2 = np.random.randint(low=1, high=n2)
            Z = generate_Z(pattern_tuple=['stagger', (m1, m2)], M0=M0)
            treat_units = []

        if (pattern == 'block'):
            m1 = np.random.randint(low=1, high=int(n1/10))
            m2 = np.random.randint(low=1, high=int(n2/3))
            #m1 = 1
            #m2 = int(n2*0.7)
            Z, treat_units = generate_Z(pattern_tuple=['block', (m1, m2)], M0=M0)
            print('***sparsity****', np.sum(Z) / np.size(Z))

        if (pattern == 'adaptive'):
            while True:
                a = np.random.randint(25)+5
                b = np.random.randint(25)+5
                Z, info = generate_Z(pattern_tuple = ['adaptive', (a, b)], M0=M0)
                if (info == 'fail'):
                    continue
                break

        tau_star_o = 1

        #PTperpZ = projection_T_orthogonal(Z, M0)

        #predict_sigma = sigma / np.sqrt(np.sum(PTperpZ**2))

        E = np.random.normal(loc=0, scale=sigma, size=M0.shape)

        #E2 =  np.random.normal(loc=0, scale=sigma*5, size=M0.shape)
        #M1 = M0 + synthetic_M0(n1, n2, mean_M / 5, r, type='Gamma')
        #tau_star = np.sum(Z * (M1 - M0)) / np.sum(Z)
        #print(tau_star)

        s = np.linalg.svd(M0, full_matrices=False, compute_uv=False)

        def synthetic_intervention_pattern():
            U = np.random.normal(loc=0, scale = 1, size = (n1, r))
            V = np.random.normal(loc = 0, scale = 1, size = (n2, r))
            M0 = U.dot(V.T)

            V = (np.random.rand(n2, r) - 0.5) * 2 * np.sqrt(3)
            M1 = U.dot(V.T)

            E = np.random.normal(loc = 0, scale = 1, size = (n1, n2))
            return M0, M1, E

        def test():
            #M0, M1, E = synthetic_intervention_pattern()
            delta = np.random.normal(loc = 0, scale = 1, size = (n1, 1)) * np.ones((n1, n2))
            #print(delta)
            d1 = np.sum(Z * delta) / np.sum(Z)
            delta = delta - d1
            tau_star = tau_star_o + d1

            O = M0 + Z*delta + tau_star * Z + E 
            #tau_star = np.sum(Z * (M1 - M0)) / np.sum(Z)
            E_op = np.linalg.norm(E+Z*delta, ord=2)
            s = np.linalg.svd(M0, full_matrices=False, compute_uv=False)
            suggest_l = min(s[r-1]/1.1, E_op*1.1)
            print('E norm', np.linalg.norm(E, ord=2), 'delta_norm', np.linalg.norm(Z*delta), 'M0_s', s[r-1])
            print(s[r-1], E_op, np.linalg.norm(E, ord=2))

            #O = M0 + Z * tau_star + Z * E2 + E
            #E_op = np.linalg.norm(E+Z*E2, ord=2)
            #suggest_l = min(s[r-1]/1.1, E_op*1.1)

            #O = (1-Z) * M0 + Z * M1 + E
            #E_op = np.linalg.norm(E+Z*(M1-M0-tau_star), ord=2)
            #print(s[r-1], E_op, np.linalg.norm(E, ord=2), np.linalg.norm(Z*(M1-M0-tau_star), ord=2))
            #suggest_l = min(s[r-1]/1.1, E_op*1.1)
            
            # s1 = np.linalg.svd(M0 + E, full_matrices=False, compute_uv=False)
            # try_r = 15
            # suggest_l = s1[try_r-1]/1.1

            results = run_algo(algo_list, O, Z, suggest_r = r, suggest_l = suggest_l, eps = 1e-4, de_mean_O=False, treat_units=treat_units, tau_star = tau_star, m2 = m2, M0 = M0)
            
            error_metric = {}
            for algo in algo_list:
                (M, tau) = results[algo]
                error_metric[algo] = metric_compute(M, tau, M0, tau_star, Z, ['tau_diff'])['tau_diff']
            return error_metric

        error_metric = test()
        print(error_metric)
        for index, algo in enumerate(algo_list):
                datas[T, index] = error_metric[algo]
        print('experiment {}, time elapses '.format(T), time.time() - t1)
    datas = pd.DataFrame(datas, columns = algo_list)
    return datas


pattern = 'stagger'
datas = synthetic_experiment_performance_run_results(n1 = n1, n2 = n2, mean_M = mean_M, sigma = sigma, num_experiment = 100, pattern = pattern)
file_name = 'results/plot_results/synthetic_{}_m1_m2_performance_results.p'.format(pattern)
#file_name = 'tmp.p'
pickle.dump(datas, open(file_name, 'wb'))

In [None]:
def synthetic_experiment_performance_plot_results(datas, label = 'min', file_name = ''):
    results = np.abs(datas.values)
    results_original = datas.values
    display(datas.describe())
    display(datas.abs().describe())
    #display((datas/10).abs().describe())
    if (label == 'min'):
        results = (results.T / np.min(results, axis=1)).T
        results = np.where(results > 10, 10, results)
    else:
        #results = (results.T / np.max(results, axis=1)).T
        results = results

    algo_list = ['convex_debias', 'robust_synthetic_control', 'missing', 'OLS', 'PCA']
    legend_dic = {'convex_debias': 'Debias Convex', 'missing': 'MC-NNM', 'OLS':'OLS', 'PCA':'W-PCA', 'robust_synthetic_control':'RSC'}
    color_dic = {'convex_debias': 'blue', 'missing': 'tab:red', 'OLS':'tab:green', 'PCA':'tab:purple', 'robust_synthetic_control':'gold'}
    legend_list = []
    for algo in algo_list:
        if (algo not in datas.columns):
            algo_list.remove(algo)
    for algo in algo_list:
        legend_list.append(legend_dic[algo])

    columns = datas.columns
    m = len(columns)
    comparison_table = np.zeros((m,m))
    for i in range(m):
        for j in range(m):
            ai = algo_list[i]
            aj = algo_list[j]
            comparison_table[i][j] = np.sum(datas.abs()[ai]<datas.abs()[aj]) / datas.shape[0]
    df_compare = pd.DataFrame(data=comparison_table, index=legend_list, columns=legend_list)
    display(df_compare)

   
    df_list = []
    df_list_original = []
    for i in range(results.shape[0]):
        for algo in algo_list:
            j = list(datas.columns).index(algo)
            df_list.append([results[i,j], algo])

            df_list_original.append([results_original[i,j], algo])

    label = r'$\frac{|\tau - \tau^{*}|}{\bar{M^{*}}}$'
    df = pd.DataFrame(df_list, columns = [label, 'algos'])
    df_original = pd.DataFrame(df_list_original, columns = [label, 'algos'])

    #g = sns.displot(data = df, x = label, hue = 'algos', hue_order = algo_list, multiple = 'dodge', shrink = 0.8, legend=False, stat = 'probability', palette=color_dic)
    #handles, labels = ax.get_legend_handles_labels()

    

    g = sns.displot(data = df, x = label, hue = 'algos', kind='ecdf', legend = False)

    Ax = g.ax
    Boxes = [item for item in Ax.get_children()
        if isinstance(item, matplotlib.lines.Line2D)]

    color_set = []
    for item in Boxes:
       if (item.get_color() in color_set):
           pass
       else:
           color_set.append(item.get_color())
    print(color_set)

    legend_patches = [matplotlib.patches.Patch(color=C, label=L) for
                 C, L in zip(color_set[::-1],
                             legend_list)]

    plt.legend(handles=legend_patches, fontsize = 13)
    plt.ylabel('CDF', fontsize=14)
    plt.xlabel(label, fontsize=14)
    plt.xlim((0, 0.8))
    plt.savefig(file_name + '_cdf.eps')
    plt.show()

    sns.catplot(data = df_original, x = 'algos', y = label, kind='bar')
    plt.show()

    # for index, algo in enumerate(datas.columns):
    #     #hist, bined = np.histogram(results[:, index], bins = 10, range = (0, 10), density=True)
    #     #plt.plot((bined[:-1]/2+bined[1:]/2), hist, label = algo)
    #     plt.hist(results[:, index], bins = 20, range = (0, 1), density=True, label = algo, alpha = 0.7)

    # plt.xlabel('instance score')
    # plt.legend()
    # plt.show()

#file_name =  'results/plot_results/synthetic_stagger_m1_m2_performance_results'
#datas = pickle.load(open(file_name + '.p', 'rb'))
#synthetic_experiment_performance_plot_results(datas, label = 'min')
synthetic_experiment_performance_plot_results(datas, label = 'max', file_name = file_name)

## Synthetic Data Row Specific Effects

In [None]:
A = np.random.rand(5, 1)
B = np.ones((5, 7))
A*B

In [None]:
### Set up
n1 = 100
n2 = 100
mean_M = 10
sigma = 1

In [None]:
def synthetic_experiment_performance_run_results(n1 = 50, n2 = 50, mean_M = 1, num_experiment=1, sigma = 0.1, pattern = 'block'):
    samples = np.zeros(num_experiment)
    t1 = time.time()

    algo_list = ['convex_debias', 'missing', 'm_debias']

    if (pattern == 'block' or pattern == 'stagger'):
        algo_list.append('robust_synthetic_control')

    #if (pattern == 'block'):
    #    algo_list.append('SDID')

    datas = np.zeros((num_experiment, len(algo_list)))


    for T in range(num_experiment):
        if (T % 100 == 0):
            print(time.time() - t1)
            print('experiment ', T)

        #r = np.random.randint(low=1, high=10+1)
        r = 10
        M0 = synthetic_M0(n1, n2, mean_M, r, type='Gamma')
        ## generating stagger pattern Z
        treat_units = []
        m1 = 0
        m2 = 0
        if (pattern == 'stagger'):
            m1 = np.random.randint(low=1, high=n1)
            #m1 = 2
            m2 = np.random.randint(low=1, high=n2)
            Z = generate_Z(pattern_tuple=['stagger', (m1, m2)], M0=M0)
            treat_units = []

        if (pattern == 'block'):
            m1 = np.random.randint(low=1, high=int(n1/3))
            m2 = np.random.randint(low=int(n2/2), high=int(n2))
            #m1 = 1
            #m2 = int(n2*0.7)
            Z, treat_units = generate_Z(pattern_tuple=['block', (m1, m2)], M0=M0)
            print('***sparsity****', np.sum(Z) / np.size(Z))

        if (pattern == 'adaptive'):
            while True:
                a = np.random.randint(25)+5
                b = np.random.randint(25)+5
                Z, info = generate_Z(pattern_tuple = ['adaptive', (a, b)], M0=M0)
                if (info == 'fail'):
                    continue
                break

        tau_star = 1

        #PTperpZ = projection_T_orthogonal(Z, M0)

        #predict_sigma = sigma / np.sqrt(np.sum(PTperpZ**2))

        E = np.random.normal(loc=0, scale=sigma, size=M0.shape)

        #E2 =  np.random.normal(loc=0, scale=sigma*5, size=M0.shape)
        #M1 = M0 + synthetic_M0(n1, n2, mean_M / 5, r, type='Gamma')
        #tau_star = np.sum(Z * (M1 - M0)) / np.sum(Z)
        #print(tau_star)

        s = np.linalg.svd(M0, full_matrices=False, compute_uv=False)

        def test():
            #M0, M1, E = synthetic_intervention_pattern()
            delta = np.random.normal(loc = 0, scale = 1, size = (n1, 1)) * np.ones((n1, n2))
            #print(delta)
            #d1 = np.sum(Z * delta) / np.sum(Z)
            #delta = delta - d1
            #tau_star = tau_star_o + d1

            O = M0 + Z*delta + tau_star * Z + E 
            #tau_star = np.sum(Z * (M1 - M0)) / np.sum(Z)
            E_op = np.linalg.norm(E+Z*delta, ord=2)
            s = np.linalg.svd(M0, full_matrices=False, compute_uv=False)
            suggest_l = min(s[r-1]/1.1, E_op*1.1)
            print('E norm', np.linalg.norm(E, ord=2), 'delta_norm', np.linalg.norm(Z*delta), 'M0_s', s[r-1])
            print(s[r-1], E_op, np.linalg.norm(E, ord=2))

            #O = M0 + Z * tau_star + Z * E2 + E
            #E_op = np.linalg.norm(E+Z*E2, ord=2)
            #suggest_l = min(s[r-1]/1.1, E_op*1.1)

            #O = (1-Z) * M0 + Z * M1 + E
            #E_op = np.linalg.norm(E+Z*(M1-M0-tau_star), ord=2)
            #print(s[r-1], E_op, np.linalg.norm(E, ord=2), np.linalg.norm(Z*(M1-M0-tau_star), ord=2))
            #suggest_l = min(s[r-1]/1.1, E_op*1.1)
            
            # s1 = np.linalg.svd(M0 + E, full_matrices=False, compute_uv=False)
            # try_r = 15
            # suggest_l = s1[try_r-1]/1.1

            results = run_algo(algo_list, O, Z, suggest_r = r, suggest_l = suggest_l, eps = 1e-4, de_mean_O=False, treat_units=treat_units, tau_star = tau_star, m2 = m2, M0 = M0)

            tau_star_i = ((tau_star + delta)*Z)[:, -1]
            
            error_metric = {}
            for algo in algo_list:
                if (algo == 'm_debias'):
                    continue
                (M, tau) = results[algo]
                error = 0
                tau_i = np.sum((O - M)*Z, axis = 1) / (np.sum(Z, axis=1) + 1e-7)
                error_metric[algo] = np.linalg.norm(tau_i-tau_star_i) #metric_compute(M, tau, M0, tau_star, Z, ['tau_diff'])['tau_diff']
                print(np.linalg.norm(M-M0) / np.linalg.norm(M0))


            E_op = np.linalg.norm(E, ord=2)
            s = np.linalg.svd(M0, full_matrices=False, compute_uv=False)
            suggest_l = min(s[r-1]/1.1, E_op*1.1)

            M, tau, info = convex_algorithm_row_specific_treatments(O, np.ones_like(O), Z, suggest_l, suggest = [], eps = 1e-3, debug = False)
            tau_debias = debias_row_specific(M, tau, Z, suggest_l)
            error_metric['m_debias'] = np.linalg.norm(tau_debias.reshape(-1) - tau_star_i)
            return error_metric

        error_metric = test()
        print(error_metric)
        for index, algo in enumerate(algo_list):
                datas[T, index] = error_metric[algo]
        print('experiment {}, time elapses '.format(T), time.time() - t1)
    datas = pd.DataFrame(datas, columns = algo_list)
    return datas


pattern = 'block'
datas = synthetic_experiment_performance_run_results(n1 = n1, n2 = n2, mean_M = mean_M, sigma = sigma, num_experiment = 100, pattern = pattern)
file_name = 'results/plot_results/synthetic_{}_m1_m2_performance_results.p'.format(pattern)
#file_name = 'tmp.p'
pickle.dump(datas, open(file_name, 'wb'))

In [None]:
def synthetic_experiment_performance_plot_results(datas, label = 'min', file_name = ''):
    results = np.abs(datas.values)
    results_original = datas.values
    display(datas.describe())
    display(datas.abs().describe())
    #display((datas/10).abs().describe())
    if (label == 'min'):
        results = (results.T / np.min(results, axis=1)).T
        results = np.where(results > 10, 10, results)
    else:
        #results = (results.T / np.max(results, axis=1)).T
        results = results

    algo_list = ['convex_debias', 'robust_synthetic_control', 'missing', 'OLS', 'PCA']
    legend_dic = {'convex_debias': 'Debias Convex', 'missing': 'MC-NNM', 'OLS':'OLS', 'PCA':'W-PCA', 'robust_synthetic_control':'RSC'}
    color_dic = {'convex_debias': 'blue', 'missing': 'tab:red', 'OLS':'tab:green', 'PCA':'tab:purple', 'robust_synthetic_control':'gold'}
    legend_list = []
    for algo in algo_list:
        if (algo not in datas.columns):
            algo_list.remove(algo)
    for algo in algo_list:
        legend_list.append(legend_dic[algo])

    columns = datas.columns
    m = len(columns)
    comparison_table = np.zeros((m,m))
    for i in range(m):
        for j in range(m):
            ai = algo_list[i]
            aj = algo_list[j]
            comparison_table[i][j] = np.sum(datas.abs()[ai]<datas.abs()[aj]) / datas.shape[0]
    df_compare = pd.DataFrame(data=comparison_table, index=legend_list, columns=legend_list)
    display(df_compare)

   
    df_list = []
    df_list_original = []
    for i in range(results.shape[0]):
        for algo in algo_list:
            j = list(datas.columns).index(algo)
            df_list.append([results[i,j], algo])

            df_list_original.append([results_original[i,j], algo])

    label = r'$\frac{|\tau - \tau^{*}|}{\bar{M^{*}}}$'
    df = pd.DataFrame(df_list, columns = [label, 'algos'])
    df_original = pd.DataFrame(df_list_original, columns = [label, 'algos'])

    #g = sns.displot(data = df, x = label, hue = 'algos', hue_order = algo_list, multiple = 'dodge', shrink = 0.8, legend=False, stat = 'probability', palette=color_dic)
    #handles, labels = ax.get_legend_handles_labels()

    

    g = sns.displot(data = df, x = label, hue = 'algos', kind='ecdf', legend = False)

    Ax = g.ax
    Boxes = [item for item in Ax.get_children()
        if isinstance(item, matplotlib.lines.Line2D)]

    color_set = []
    for item in Boxes:
       if (item.get_color() in color_set):
           pass
       else:
           color_set.append(item.get_color())
    print(color_set)

    legend_patches = [matplotlib.patches.Patch(color=C, label=L) for
                 C, L in zip(color_set[::-1],
                             legend_list)]

    plt.legend(handles=legend_patches, fontsize = 13)
    plt.ylabel('CDF', fontsize=14)
    plt.xlabel(label, fontsize=14)
    plt.xlim((0, 0.8))
    plt.savefig(file_name + '_cdf.eps')
    plt.show()

    sns.catplot(data = df_original, x = 'algos', y = label, kind='bar')
    plt.show()

    # for index, algo in enumerate(datas.columns):
    #     #hist, bined = np.histogram(results[:, index], bins = 10, range = (0, 10), density=True)
    #     #plt.plot((bined[:-1]/2+bined[1:]/2), hist, label = algo)
    #     plt.hist(results[:, index], bins = 20, range = (0, 1), density=True, label = algo, alpha = 0.7)

    # plt.xlabel('instance score')
    # plt.legend()
    # plt.show()

#file_name =  'results/plot_results/synthetic_stagger_m1_m2_performance_results'
#datas = pickle.load(open(file_name + '.p', 'rb'))
#synthetic_experiment_performance_plot_results(datas, label = 'min')
synthetic_experiment_performance_plot_results(datas, label = 'max', file_name = file_name)

## Synthetic experiments for distribution test

In [None]:
### Set up
n1 = 100
n2 = 100
mean_M = 10
r = 10
sigma = 1
sigma_d = 1

In [None]:
def synthetic_experiment_distribution_run_results(n1 = 50, n2 = 50, mean_M = 1, r = 5, num_experiment=1, sigma = 0.1, sigma_d = 0.1, pattern = 'stagger'):
    '''
        generate (M0, Z) pair:
            - M0 has shape (50x50) with mean_M and rank r
            - Z is generated in a stagger way, randomly select m1 rows, each row randomly gets treated after column m2
            - m1 ~ [1, n1), m2 ~ [n2/5, n2) uniformly

        for each (M0, Z) pair:
            - compute the score
            -   

    '''
    samples = np.zeros(num_experiment)
    t1 = time.time()
    for T in range(num_experiment):
        if (T % 100 == 0):
            print(time.time() - t1)
            print('experiment ', T)
        #np.random.seed(1)
        M0 = synthetic_M0(n1, n2, mean_M, r)
        ## generating stagger pattern Z
        if (pattern == 'stagger'):
            m1 = np.random.randint(low=1, high=n1)
            m2 = np.random.randint(low=int(n2/2), high=n2)
            Z = generate_Z(pattern_tuple=['stagger', (m1, m2)], M0=M0)

        if (pattern == 'block'):
            m1 = np.random.randint(low=1, high=int(n1/3))
            m2 = np.random.randint(low=int(n2/2), high=n2)
            Z, treat_units = generate_Z(pattern_tuple=['block', (m1, m2)], M0=M0)

        print('***sparsity****', np.sum(Z) / np.size(Z))

        tau_star = 1

        PTperpZ = projection_T_orthogonal(Z, M0)

        #predict_sigma = sigma / np.sqrt(np.sum(PTperpZ**2))

        predict_sigma =  np.sqrt((sigma**2) / np.sum(PTperpZ**2) + (sigma_d**2) * np.sum((PTperpZ**2)*Z) / (np.sum(PTperpZ**2)**2))

        #print(predict_sigma, sigma / np.sqrt(np.sum(PTperpZ**2)))

        s = np.linalg.svd(M0, full_matrices=False, compute_uv=False)

        def test():
            #np.random.seed(T)
            E = np.random.normal(loc=0, scale=sigma, size=M0.shape)
            delta = np.random.normal(loc = 0, scale = sigma_d, size = M0.shape)
            O = M0 + Z * tau_star + E + delta * Z
            E_op = np.linalg.norm(E + delta * Z, ord=2)
            suggest_l = min(s[r-1]/1.1, E_op*1.1)

            #input O/predict_sigma, eliminate precision issue
            results = run_algo(['convex_debias', 'convex'], O, Z, suggest_r = -1, suggest_l = suggest_l, eps = predict_sigma/1000, de_mean_O=False)
            (M, tau) = results['convex_debias']
            (M_no_debias, tau_no_debias) = results['convex']


            projected_Z = projection_T_orthogonal(Z, M_no_debias)

            #E_hat = O - M_no_debias - tau_no_debias * Z

            #print(np.sum(E_hat**2) / np.sum((E + delta*Z)**2))


            
            estimated_sigma_level1 = np.sqrt((sigma**2) / np.sum(projected_Z**2) + (sigma_d**2) * np.sum((projected_Z**2)*Z) / (np.sum(projected_Z**2)**2))

            #estimated_sigma_level2 = np.sqrt(np.sum((projected_Z**2)*(E_hat**2))) / np.sum(projected_Z**2)

            #print(predict_sigma, estimated_sigma_level1, estimated_sigma_level2)

            return (tau-tau_star)/estimated_sigma_level1

            return (tau-tau_star)/predict_sigma

        # def KS_test():
        #     total = 100
        #     tau_samples = np.zeros(total)
        #     for i in range(total):
        #         tau_samples[i] = test()
        #     KS_statistic, p_value = scipy.stats.ks_1samp(tau_samples, scipy.stats.norm.cdf)
        #     print(KS_statistic, p_value)
        #     return KS_statistic

        samples[T] = test()
        print('experiment {}, time elapses {}, tau error {}'.format(T, time.time() - t1, samples[T]))
        #print(samples[T], predict_sigma)
    return samples

#for r in [2, 4, 6, 8, 10]:
#for r in [4, 10, 15, 20, 25]:
r = 10

#samples = synthetic_experiment_distribution_run_results(n1 = n1, n2 = n2, mean_M = mean_M, r = r, sigma = sigma, sigma_d = sigma_d, num_experiment = 1000, pattern = 'stagger')


n1_list = [50, 100, 150, 200]
ratio_list = [0.5, 1, 2, 4]
for n1 in n1_list:
    for ratio in ratio_list:
        n2 = int(n1 * ratio)
        samples = synthetic_experiment_distribution_run_results(n1 = n1, n2 = n2, mean_M = mean_M, r = r, sigma = sigma, sigma_d = sigma, num_experiment = 1000, pattern = 'stagger')
        file_name = 'results/plot_results/stagger_estimateT_distribution_samples_r{}_n1_{}_n2_{}.p'.format(r, n1, n2)
        pickle.dump(samples, open(file_name, 'wb'))

In [None]:
def synthetic_experiment_distribution_plot_distribution_results(samples, file_name):
    hist, bined = np.histogram(samples, bins = 30, density=True)
    plt.plot((bined[:-1]/2+bined[1:]/2), hist)
    pos_guassian = np.linspace(min(samples), max(samples), 1000)
    pdf_guassian = norm.pdf(pos_guassian, loc=0, scale=1)
    plt.plot(pos_guassian, pdf_guassian)
    plt.show()

    print(np.mean(samples), np.std(samples))

    g = sns.displot(data=samples, kind='hist', stat='density')
    g.set(xlim=(-4, 4))
    g.set(ylim=(0.0, 0.45))
    plt.plot(pos_guassian, pdf_guassian, label=r'$N(0, 1)$', color='r')
    plt.legend(fontsize = 17)
    plt.ylabel('Density', fontsize = 18)
    plt.tight_layout()
    plt.savefig(file_name + '.eps')
    plt.show()
    x = scipy.stats.norm.rvs(loc=0, size=100000)
    sns.ecdfplot(data=x)
    plt.show()

    #print(scipy.stats.wasserstein_distance(samples, x))

def KS_test(samples):
    KS_statistic, p_value = scipy.stats.ks_1samp(samples, scipy.stats.norm.cdf)
    
    print(KS_statistic, p_value)

    x = scipy.stats.norm.rvs(loc=0, size=10000)
    print(scipy.stats.ks_1samp(x, scipy.stats.norm.cdf))
    return KS_statistic

file_path = 'results/plot_results/block'
    # file_name = file_path+'_distribution_before_KS_test_samples_r{}.p'.format(r)
    # samples = pickle.load(open(file_name, 'rb'))
synthetic_experiment_distribution_plot_distribution_results(samples, file_path + '_distribution_r{}'.format(r))
# KS_test(samples)
# print('0.95 portion', np.sum(np.abs(samples)<=1.96), len(samples))


#for r in [2, 4, 6, 8, 10]:



In [None]:

n1_list = [50, 100, 150, 200]
ratio_list = [0.5, 1, 2, 4]
df = pd.DataFrame()
data = np.zeros((4, 4))
for i,n1 in enumerate(n1_list):
    for j,ratio in enumerate(ratio_list):
        n2 = int(n1 * ratio)
        file_path = 'results/plot_results/stagger'
        file_name = 'results/plot_results/stagger_estimateT_distribution_samples_r{}_n1_{}_n2_{}.p'.format(r, n1, n2)
        samples = pickle.load(open(file_name, 'rb'))
        #synthetic_experiment_distribution_plot_distribution_results(samples, file_path + '_distribution_r{}'.format(r))
        #KS_test(samples)
        print('0.95 portion', n1, n2, np.sum(np.abs(samples)<=1.96), len(samples))
        data[i, j] = np.sum(np.abs(samples)<=1.96)/len(samples)

row_headers = [r'$n_1 = 50$', '100', '150', '200']
column_headers = [r'$\frac{n_2}{n_1} = 0.5$', '1', '2', '4']
cell_text = data
rcolors = plt.cm.BuPu(np.full(len(row_headers), 0.1))
ccolors = plt.cm.BuPu(np.full(len(column_headers), 0.1))
 
fig, ax = plt.subplots()
ax.set_axis_off()
table = plt.table(cellText=cell_text,
                      rowLabels=row_headers,
                      rowColours=rcolors,
                      rowLoc='right',
                      colColours=ccolors,
                      colLabels=column_headers,
                      cellLoc='center')
table.set_fontsize(14)
table.scale(1, 6)
plt.savefig('tmp.eps')
#scipy.__version__

In [None]:
array_r = [4, 10, 15, 20, 25]
KS = []
for r in array_r:
    file_name = 'results/plot_results/block_distribution_before_KS_test_samples_r{}.p'.format(r)
    samples = pickle.load(open(file_name, 'rb'))
    KS.append(KS_test(samples))
plt.plot(array_r, KS, label = 'Block Pattern', linestyle='-', marker='o')

KS = []
for r in array_r:
    file_name = 'results/plot_results/stagger_distribution_before_KS_test_samples_r{}.p'.format(r)
    samples = pickle.load(open(file_name, 'rb'))
    KS.append(KS_test(samples))
plt.plot(array_r, KS, label = 'Stagger Pattern', linestyle='-', marker='o')

plt.xlabel(r'Rank $r$', fontsize = 15)
plt.ylabel('KS distance', fontsize = 15)
plt.legend(fontsize = 14)
plt.savefig('results/plot_results/KS-rank-plot.eps')
plt.show()

### Distribution plot for K-S statistics

In [None]:
def ks_critical_value(n_trials, alpha):
    return scipy.stats.kstwo.ppf(1-alpha, n_trials)

def synthetic_experiment_distribution_plot_statistic_results(samples):
    #plt.hist(samples, bins = 10, range = (0.03, 0.2), density=True, label = 'empirical K-S statistics')

    x = np.linspace(ks_critical_value(10000, 0.01), ks_critical_value(10000, 0.99), 100)
    plt.plot(x, scipy.stats.kstwo.pdf(x, 10000), 'r-', lw=2, label = 'theoretical distribution')

    plt.axvline(ks_critical_value(100, 0.05), color='r', lw=2, ls='--', alpha=0.3, label='p-value 0.05')
    plt.axvline(ks_critical_value(100, 0.10), color='b', lw=2, ls='--', alpha=0.3, label='p-value 0.1')
    plt.xlabel('K-S statistic over 100 samples')
    plt.legend()
    plt.savefig('results/plot_results/block_distribution_samples.eps')
    plt.show()

    print('empirical portion with p-value >= 0.05 is ', np.sum(samples >= ks_critical_value(100, 0.05)) / len(samples))
    print('empirical portion with p-value >= 0.1 is ', np.sum(samples >= ks_critical_value(100, 0.1)) / len(samples))

file_name = 'results/plot_results/block_distribution_samples.p'
samples = pickle.load(open(file_name, 'rb'))
synthetic_experiment_distribution_plot_statistic_results(samples)

## Invariant to the changes of $\tau^{*}$

In [None]:
%run ../src/algorithms/SDID.py

In [None]:
def synthetic_experiment_performance_run_results(n1 = 50, n2 = 50, mean_M = 1, num_experiment=1, sigma = 0.1, pattern = 'block'):
    samples = np.zeros(num_experiment)
    t1 = time.time()


    r = 10

    np.random.seed(10)
    M0 = synthetic_M0(n1, n2, mean_M, r, type='Gamma')
    

    E = np.random.normal(loc=0, scale=sigma, size=(n1, n2))

    s = np.linalg.svd(M0, full_matrices=False, compute_uv=False)

    if (pattern == 'stagger'):
        m1 = np.random.randint(low=1, high=n1)
        m2 = np.random.randint(low=1, high=n2)
        Z = generate_Z(pattern_tuple=['stagger', (m1, m2)], M0=M0)
        treat_units = []

    if (pattern == 'block'):
        m1 = np.random.randint(low=1, high=int(n1/10))
        m2 = 20#np.random.randint(low=1, high=int(n2/3))
        #m1 = 1
        Z, treat_units = generate_Z(pattern_tuple=['block', (m1, m2)], M0=M0)

    if (pattern == 'adaptive'):
        while True:
            a = np.random.randint(40)+5
            b = np.random.randint(40)+5
            #b = 2
            Z, info = generate_Z(pattern_tuple = ['adaptive', (a, b)], M0=M0)
            if (info == 'fail'):
                continue
            break
        treat_units = []
        m2 = 0
    
    print('***sparsity****', np.sum(Z) / np.size(Z))

    datas = np.zeros((len(tau_list), len(algo_list)))

    for (T, tau_star) in enumerate(tau_list):

        def test():
            O = M0 + Z * tau_star + E
            suggest_l = s[r-1]/1.1

            random_tau_star = np.random.rand()*tau_star*10
            results = run_algo(algo_list, O, Z, suggest_r = r, suggest_l = suggest_l, eps = 1e-6, de_mean_O=False, treat_units=treat_units, tau_star = tau_star, m2 = m2, M0 = M0, suggest_random_tau=0)
            
            error_metric = {}
            for algo in algo_list:
                (M, tau) = results[algo]
                error_metric[algo] = metric_compute(M, tau, M0, tau_star, Z, ['tau_diff'])['tau_diff']
            return error_metric

        error_metric = test()
        print(error_metric)
        for index, algo in enumerate(algo_list):
                datas[T, index] = error_metric[algo]
        print('tau_star {}, time elapses '.format(tau_star), time.time() - t1)
    datas = pd.DataFrame(datas, columns = algo_list)
    return datas

tau_list = np.arange(-200, 201, 50)
mean_M = 10
n1 = 100
n2 = 100
sigma = 1
pattern = 'block'
algo_list = ['convex_debias', 'robust_synthetic_control', 'missing', 'OLS', 'SDID']


datas = synthetic_experiment_performance_run_results(n1 = n1, n2 = n2, mean_M = mean_M, sigma = sigma, num_experiment = 1, pattern = pattern)

legend_dic = {'convex_debias': 'Debias Convex', 'missing': 'MC-NNM', 'OLS':'OLS', 'robust_synthetic_control':'RSC', 'SDID': 'SDID'}

for algo in algo_list:
    plt.plot(tau_list, np.abs(datas[algo]), label = legend_dic[algo])
plt.legend()
plt.xlabel(r'$\tau^{*}$', fontsize = 15)
plt.ylabel(r'$|\tau-\tau^{*}|$', fontsize = 15)
plt.savefig('invariance-tau.eps')
#file_name = 'results/plot_results/synthetic_{}_m1_m2_performance_results.p'.format(pattern)
#file_name = 'tmp.p'
#pickle.dump(datas, open(file_name, 'wb'))

## Landscape of Non-convex method

In [None]:
def synthetic_experiment_performance_run_results(n1 = 50, n2 = 50, mean_M = 1, num_experiment=1, sigma = 0.1, pattern = 'block'):
    samples = np.zeros(num_experiment)
    t1 = time.time()

    algo_list = ['convex_debias', 'missing', 'OLS', 'PCA', 'non_convex']

    if (pattern == 'block' or pattern == 'stagger'):
        algo_list.append('robust_synthetic_control')



    #r = 30

    #np.random.seed(10)
    #M0 = synthetic_M0(n1, n2, mean_M, r, type='Gamma')

    n = n1

    M0 = np.zeros((2*n, 2*n))
    M0[0:n, 0:n] = -1 

    Z = np.zeros((2*n, 2*n))
    
    Z[0:int(n/1.1), 0:int(n/1.1)] = 1
    Z[n:, n:] = 1

    # Z[0:n, 0:n] = 1
    # Z[n:2*n, n:2*n] = 1

    r = 1

    u, s, vh = np.linalg.svd(M0, full_matrices=False)
    u = u[:,:r]
    vh = vh[:r, :]
    print(u.T.dot(Z).dot(vh.T))
    s1 = np.linalg.svd(projection_T_orthogonal(Z, M0), full_matrices=False, compute_uv=False)
    print(s1)

    #M0 = synthetic_M0(n1, n2, 0, r, type='Gaussian')
    # M0 = readData.read_data('sales')
    # n1 = M0.shape[0]
    # n2 = M0.shape[1]
    #r = 34

    #E = np.random.normal(loc=0, scale=sigma, size=(n1, n2))

    # s = np.linalg.svd(M0, full_matrices=False, compute_uv=False)

    # if (pattern == 'stagger'):
    #     m1 = np.random.randint(low=1, high=n1)
    #     m2 = np.random.randint(low=1, high=n2)
    #     Z = generate_Z(pattern_tuple=['stagger', (m1, m2)], M0=M0)
    #     treat_units = []

    # if (pattern == 'block'):
    #     #m1 = np.random.randint(low=1, high=int(n1/10))
    #     m1 = 10
    #     m2 = np.random.randint(low=1, high=int(n2/3))
    #     #m1 = 1
    #     Z, treat_units = generate_Z(pattern_tuple=['block', (m1, m2)], M0=M0)

    # if (pattern == 'adaptive'):
    #     while True:
    #         a = np.random.randint(40)+5
    #         b = np.random.randint(40)+5
    #         #b = 2
    #         Z, info = generate_Z(pattern_tuple = ['adaptive', (a, b)], M0=M0)
    #         if (info == 'fail'):
    #             continue
    #         break
    #     treat_units = []
    #     m2 = 0
    
    # print('***sparsity****', np.sum(Z) / np.size(Z))

    tau_star = 0
    O = M0 + Z * 0
    tau_list = np.arange(-2, 1, 0.01)
    l = 8
    datas = np.zeros((len(tau_list)))
    datas_convex = np.zeros((len(tau_list)))
    for (T,tau) in enumerate(tau_list): 
        u,s,vh = np.linalg.svd(O - tau*Z, full_matrices=False)
        s_hard = s
        s_hard[r:] = 0
        M = (u*s_hard).dot(vh)
        tau_new = np.sum(Z*(O-M)) / np.sum(Z)
        datas[T] = np.sum((O-tau*Z-M)**2)

        u,s,vh = np.linalg.svd(O - tau*Z, full_matrices=False)
        s_soft = np.maximum(s-l, 0)
        M = (u*s_soft).dot(vh)
        datas_convex[T] = np.sum((O-tau*Z-M)**2) + 2*l*np.sum(s_soft)
        #datas[T] = tau_new-tau


    s = np.linalg.svd(M0, full_matrices=False, compute_uv=False)
    #print(s[0], np.linalg.norm(E, 2))
    results = run_algo(['convex_debias'], O, Z, suggest_r = r, suggest_l = 4, eps = 1e-6, de_mean_O=False, treat_units=[], tau_star = 0, m2 = 0, M0 = M0, suggest_random_tau=0)
    (M, tau) = results['convex_debias']
    error_metric = metric_compute(M, tau, M0, tau_star, Z, ['tau_diff'])['tau_diff']
    print(error_metric)
    
    plt.plot(tau_list, datas, label = r'$S(\tau)$')
    plt.plot(tau_list, datas_convex, label = r'$S_{\mathrm{cvx}}(\tau)$')
    plt.xlabel(r'$\tau$', fontsize=15)
    #plt.ylabel(r'$S(\tau)$', fontsize=15)
    plt.legend(fontsize = 15)
    plt.savefig('results/plot_results/instability_non_convex_optimization_Bai09.eps')


tau_list = np.arange(-200, 201, 50)
mean_M = 30
n1 = 100
n2 = 100
sigma = 0
pattern = 'block'
datas = synthetic_experiment_performance_run_results(n1 = n1, n2 = n2, mean_M = mean_M, sigma = sigma, num_experiment = 1, pattern = pattern)

# algo_list = ['convex_debias', 'robust_synthetic_control', 'missing', 'OLS', 'PCA', 'non_convex']
# legend_dic = {'convex_debias': 'Debias Convex', 'missing': 'MC-NNM', 'OLS':'OLS', 'PCA':'W-PCA', 'robust_synthetic_control':'RSC', 'non_convex': 'Non Convex'}

# for algo in algo_list:
#     plt.plot(tau_list, np.abs(datas[algo]), label = legend_dic[algo])
# plt.legend()
# plt.xlabel(r'$\tau^{*}$', fontsize = 15)
# plt.ylabel(r'$|\tau-\tau^{*}|$', fontsize = 15)
# plt.savefig('invariance-tau.eps')
#file_name = 'results/plot_results/synthetic_{}_m1_m2_performance_results.p'.format(pattern)
#file_name = 'tmp.p'
#pickle.dump(datas, open(file_name, 'wb'))

In [None]:
def synthetic_experiment_performance_run_results(n1 = 50, n2 = 50, mean_M = 1, num_experiment=1, sigma = 0.1, pattern = 'block'):
    samples = np.zeros(num_experiment)
    t1 = time.time()

    algo_list = ['convex_debias', 'missing', 'OLS', 'PCA', 'non_convex']

    if (pattern == 'block' or pattern == 'stagger'):
        algo_list.append('robust_synthetic_control')



    #r = 30

    #np.random.seed(10)
    #M0 = synthetic_M0(n1, n2, mean_M, r, type='Gamma')

    n = n1

    M0 = np.zeros((2*n+int(1.5*n), 2*n+int(1.5*n)))
    M0[0:n, 0:n] = 1
    M0[n:2*n, n:2*n] = - 1 

    Z = np.zeros((2*n+int(1.5*n), 2*n+int(1.5*n)))
    
    Z[0:int(n/1.1), 0:int(n/1.1)] = 1
    Z[n:int(n/1.1)+n, n:int(n/1.1)+n] = 1

    # Z[0:n, 0:n] = 1
    # Z[n:2*n, n:2*n] = 1

    Z[2*n:, 2*n:] = 1
    r = 2

    u, s, vh = np.linalg.svd(M0, full_matrices=False)
    u = u[:,:r]
    vh = vh[:r, :]
    print(u.T.dot(Z).dot(vh.T))
    s1 = np.linalg.svd(projection_T_orthogonal(Z, M0), full_matrices=False, compute_uv=False)
    print(s1)

    #M0 = synthetic_M0(n1, n2, 0, r, type='Gaussian')
    # M0 = readData.read_data('sales')
    # n1 = M0.shape[0]
    # n2 = M0.shape[1]
    #r = 34

    #E = np.random.normal(loc=0, scale=sigma, size=(n1, n2))

    # s = np.linalg.svd(M0, full_matrices=False, compute_uv=False)

    # if (pattern == 'stagger'):
    #     m1 = np.random.randint(low=1, high=n1)
    #     m2 = np.random.randint(low=1, high=n2)
    #     Z = generate_Z(pattern_tuple=['stagger', (m1, m2)], M0=M0)
    #     treat_units = []

    # if (pattern == 'block'):
    #     #m1 = np.random.randint(low=1, high=int(n1/10))
    #     m1 = 10
    #     m2 = np.random.randint(low=1, high=int(n2/3))
    #     #m1 = 1
    #     Z, treat_units = generate_Z(pattern_tuple=['block', (m1, m2)], M0=M0)

    # if (pattern == 'adaptive'):
    #     while True:
    #         a = np.random.randint(40)+5
    #         b = np.random.randint(40)+5
    #         #b = 2
    #         Z, info = generate_Z(pattern_tuple = ['adaptive', (a, b)], M0=M0)
    #         if (info == 'fail'):
    #             continue
    #         break
    #     treat_units = []
    #     m2 = 0
    
    # print('***sparsity****', np.sum(Z) / np.size(Z))

    tau_star = 0
    O = M0 + Z * 0
    tau_list = np.arange(-2, 2, 0.01)
    l = 4
    datas = np.zeros((len(tau_list)))
    datas_convex = np.zeros((len(tau_list)))
    for (T,tau) in enumerate(tau_list): 
        u,s,vh = np.linalg.svd(O - tau*Z, full_matrices=False)
        s_hard = s
        s_hard[r:] = 0
        M = (u*s_hard).dot(vh)
        tau_new = np.sum(Z*(O-M)) / np.sum(Z)
        datas[T] = np.sum((O-tau*Z-M)**2)

        u,s,vh = np.linalg.svd(O - tau*Z, full_matrices=False)
        s_soft = np.maximum(s-l, 0)
        M = (u*s_soft).dot(vh)
        datas_convex[T] = np.sum((O-tau*Z-M)**2) + 2*l*np.sum(s_soft)
        #datas[T] = tau_new-tau


    s = np.linalg.svd(M0, full_matrices=False, compute_uv=False)
    #print(s[0], np.linalg.norm(E, 2))
    results = run_algo(['convex_debias'], O, Z, suggest_r = r, suggest_l = 4, eps = 1e-6, de_mean_O=False, treat_units=[], tau_star = 0, m2 = 0, M0 = M0, suggest_random_tau=0)
    (M, tau) = results['convex_debias']
    error_metric = metric_compute(M, tau, M0, tau_star, Z, ['tau_diff'])['tau_diff']
    print(error_metric)
    
    plt.plot(tau_list, datas)
    plt.plot(tau_list, datas_convex)
    plt.xlabel(r'$\tau$', fontsize=15)
    plt.ylabel(r'$\tilde{S}(\tau)$', fontsize=15)
    plt.savefig('results/plot_results/instability_non_convex_optimization.eps')


tau_list = np.arange(-200, 201, 50)
mean_M = 30
n1 = 50
n2 = 50
sigma = 0
pattern = 'block'
datas = synthetic_experiment_performance_run_results(n1 = n1, n2 = n2, mean_M = mean_M, sigma = sigma, num_experiment = 1, pattern = pattern)

# algo_list = ['convex_debias', 'robust_synthetic_control', 'missing', 'OLS', 'PCA', 'non_convex']
# legend_dic = {'convex_debias': 'Debias Convex', 'missing': 'MC-NNM', 'OLS':'OLS', 'PCA':'W-PCA', 'robust_synthetic_control':'RSC', 'non_convex': 'Non Convex'}

# for algo in algo_list:
#     plt.plot(tau_list, np.abs(datas[algo]), label = legend_dic[algo])
# plt.legend()
# plt.xlabel(r'$\tau^{*}$', fontsize = 15)
# plt.ylabel(r'$|\tau-\tau^{*}|$', fontsize = 15)
# plt.savefig('invariance-tau.eps')
#file_name = 'results/plot_results/synthetic_{}_m1_m2_performance_results.p'.format(pattern)
#file_name = 'tmp.p'
#pickle.dump(datas, open(file_name, 'wb'))

In [None]:
def synthetic_experiment_performance_plot_results(datas, label = 'min', file_name = ''):
    results = np.abs(datas.values)
    results_original = datas.values
    display(datas.describe())
    #display(datas.abs().describe())
    display(datas.abs().describe())
    if (label == 'min'):
        results = (results.T / np.min(results, axis=1)).T
        results = np.where(results > 10, 10, results)
    else:
        #results = (results.T / np.max(results, axis=1)).T
        results = results

    algo_list = ['convex_debias', 'robust_synthetic_control', 'missing', 'OLS', 'PCA']
    legend_dic = {'convex_debias': 'Convex Debias', 'missing': 'MC-NNM', 'OLS':'OLS', 'PCA':'W-PCA', 'robust_synthetic_control':'RSC'}
    color_dic = {'convex_debias': 'blue', 'missing': 'tab:red', 'OLS':'tab:green', 'PCA':'tab:purple', 'robust_synthetic_control':'gold'}
    legend_list = []
    for algo in algo_list:
        if (algo not in datas.columns):
            algo_list.remove(algo)
    for algo in algo_list:
        legend_list.append(legend_dic[algo])

    columns = datas.columns
    m = len(columns)
    comparison_table = np.zeros((m,m))
    for i in range(m):
        for j in range(m):
            ai = algo_list[i]
            aj = algo_list[j]
            comparison_table[i][j] = np.sum(datas.abs()[ai]<datas.abs()[aj]) / datas.shape[0]
    df_compare = pd.DataFrame(data=comparison_table, index=legend_list, columns=legend_list)
    display(df_compare)

   
    df_list = []
    df_list_original = []
    for i in range(results.shape[0]):
        for algo in algo_list:
            j = list(datas.columns).index(algo)
            df_list.append([results[i,j], algo])

            df_list_original.append([results_original[i,j], algo])

    label = r'$|\tau - \tau^{*}|$'
    df = pd.DataFrame(df_list, columns = [label, 'algos'])
    df_original = pd.DataFrame(df_list_original, columns = [label, 'algos'])

    #g = sns.displot(data = df, x = label, hue = 'algos', hue_order = algo_list, multiple = 'dodge', shrink = 0.8, legend=False, stat = 'probability', palette=color_dic)
    #handles, labels = ax.get_legend_handles_labels()

    

    g = sns.displot(data = df, x = label, hue = 'algos', kind='ecdf', legend = False)

    Ax = g.ax
    Boxes = [item for item in Ax.get_children()
        if isinstance(item, matplotlib.lines.Line2D)]

    color_set = []
    for item in Boxes:
       if (item.get_color() in color_set):
           pass
       else:
           color_set.append(item.get_color())
    print(color_set)

    legend_patches = [matplotlib.patches.Patch(color=C, label=L) for
                 C, L in zip(color_set[::-1],
                             legend_list)]

    plt.legend(handles=legend_patches, fontsize = 13)
    plt.ylabel('CDF', fontsize=14)
    plt.xlabel(label, fontsize=14)
    plt.xlim((0, 0.8))
    plt.savefig(file_name + '_cdf.eps')
    plt.show()

    sns.catplot(data = df_original, x = 'algos', y = label, kind='bar')
    plt.show()

synthetic_experiment_performance_plot_results(datas, label = 'max', file_name = file_name)