In [9]:
import numpy as np
import pandas as pd
import random
import utilities as est
from a3mV2 import OptVariance
from synthetic_generate import generate_synthetic_data
np.set_printoptions(precision=3)

# set key parameters
d = 20 # num of bins in [low,high]
beta = 1
data_type = "exp"
est_type = "sw"
N=d*10000
total_repeat = 10

# set index of bins
bin_width = beta/d
bin_idxs = np.linspace(bin_width/2, beta-bin_width/2,d-1)
print("indices of bins are:", bin_idxs)

print('generating %d data samples with %s distribution.\n' %(N, data_type))
idx_original = random.choices(np.arange(d),x_q,k=N)
sample_original = x_grid[idx_original]

# save data
temp = {'bin_idxs': bin_idxs, 'x_q':x_q}
filename = "data/%s_q_%d.csv" %(data_type,d)
pd.DataFrame(temp).to_csv(filename)

# set epsilon
eps_grid = 0.5*np.arange(1,6)
filename = 'data/eps_grid.csv'
temp = {'eps_grid': eps_grid}
pd.DataFrame(temp).to_csv(filename)

for i in range(len(eps_grid)):
    eps= eps_grid[i]
    print('eps=%.2f'%(eps))

    if est_type == 'sw':
        a_grid, M = est.SquareWave(eps,x_grid)
    elif est_type == 'grr':
        a_grid, M = est.GenRandResp(eps, x_grid)
    else:
        print('warning: invalid estimator type!')
    print('Found matrix M for %s estimator!' %(est_type))

    filename = 'data/%s_M_%.2f_%d.csv'%(est_type,eps,d)
    pd.DataFrame(M).to_csv(filename)
    filename = 'data/%s_a_%.2f_%d.csv'%(est_type,eps,d)
    temp = {'a_grid':a_grid}
    pd.DataFrame(temp).to_csv(filename)

    print("generating random num pools.")
    rand_num = np.zeros((N,d))
    for j in range(d):
        rand_num[:,j] = random.choices(a_grid,M[:,j],k=N)
    
    print('estimating data distribution with %s estimator, repeat %d times.'
           %(est_type, total_repeat))
    x_q_noisy = np.zeros((1,d))
    for k in range(total_repeat):
        idx_noise = random.choices(range(N),k=N)
        idx_pair = list(zip(idx_noise,idx_original))
        sample_perturbed = rand_num[tuple(zip(*idx_pair))]
        elements,counts = np.unique(sample_perturbed,return_counts=True)
        x_q_est = est.EM(M,counts,eps)
        x_q_noisy +=x_q_est
    x_q_noisy = x_q_noisy/np.sum(x_q_noisy)
    print('q_est(x)=', x_q_noisy)

    # Find the AAA transition matrix
    _, M = OptVariance(eps, x_grid, x_q_noisy)
    filename = 'data/%s_%s_M_%.2f_%d.csv'%(est_type,data_type,eps,d)
    pd.DataFrame(M).to_csv(filename)
    print("AAA Solution Found!\n")
print('taks complete!')

x= [0.   0.05 0.1  0.15 0.2  0.25 0.3  0.35 0.4  0.45 0.5  0.55 0.6  0.65
 0.7  0.75 0.8  0.85 0.9  0.95]
q(x)= [0.147 0.126 0.109 0.093 0.08  0.069 0.06  0.051 0.044 0.038 0.033 0.028
 0.024 0.021 0.018 0.015 0.013 0.011 0.01  0.008]
generating 200000 data samples with exp distribution.

eps=0.50
Found matrix M for sw estimator!
generating random num pools.
estimating data distribution with sw estimator, repeat 10 times.
q_est(x)= [[0.141 0.127 0.108 0.104 0.077 0.063 0.059 0.055 0.043 0.038 0.03  0.027
  0.024 0.021 0.018 0.016 0.014 0.013 0.011 0.011]]
AAA Solution Found!

eps=1.00
Found matrix M for sw estimator!
generating random num pools.
