In [2]:
import numpy as np
import scipy as sp
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt 
import seaborn as sns
from lib.get_density import *

## Data simulation for figure 3

In [5]:
M = 20 #Number of point pair in one Batch
# N = 20  #Number of Batches
S = 300  #Number of subsampling, should be smaller than M*N
std = 0.05  #Standard diviation for distance
jump = 0.3  #Distance of shift
jump_prob = 0.5  #Probability of shifting
# ve = 0.0025 #Sinkhorn regulariser, suggested order is O(M*N^{-1/dim})
subsample = True
EMML_itr = 5000  #itr for minimisation algo (make it large when M is large)
E = 100  #resolution for output image
DF3 = pd.DataFrame(columns = ["L2","seed","M","N","S","std","jump","jump_prob","epsi","subsample","EMML_itr","E"])

num_sim = 100 #number of simulations
for _ in tqdm(range(num_sim),desc=" outer", position=0):
    cnt = 0
    for N in tqdm(np.logspace(1,2.7,20,dtype = int), desc=" inner loop", position=1, leave=False):
        cnt += 1
        for ve in [0.001,0.0025,0.01]:    
            seed = np.random.SeedSequence()
            gen = np.random.Generator(np.random.MT19937(seed))
            L2 = get_L2_estimator(gen,M,N,S,std,jump,jump_prob,ve,subsample,EMML_itr,E)
            ss = 0
            if(subsample):
                ss = 1
            new_row = {"L2": L2, "seed": str(seed.entropy),"M" : M,"N" : N,"S" : S,"std" : std,"jump" : jump,"jump_prob" : jump_prob,"epsi" : ve,"subsample" : ss,"EMML_itr" : EMML_itr,"E" : E}
            DF3 = DF3.append(new_row,ignore_index=True)
        if(cnt%5 == 0):
            DF3.to_csv('data_F3.csv')
    DF3.to_csv('data_F3.csv')

## Data simulation for figure 4

In [None]:

M = 20 #Number of point pair in one Batch
N = 20  #Number of Batches
S = 300  #Number of subsampling, should be smaller than M*N
# std = 0.025  #Standard diviation for distance
jump = 0.3  #Distance of shift
jump_prob = 0.5  #Probability of shifting
# ve = 3*1e-3  #Sinkhorn regulariser, suggested order is O(M*N^{-1/dim})
subsample = True
EMML_itr = 5000  #Maxitr for minimisation algo (make it large when M is large)
E = 100  #resolution for output image
DF4 = pd.DataFrame(columns = ["L2","seed","M","N","S","std","jump","jump_prob","epsi","subsample","EMML_itr","E"])

num_sim = 100 #number of simulations
for _ in tqdm(range(num_sim),desc=" outer", position=0):
    cnt = 0
    for i in tqdm(np.logspace(-0.9,-3.1,50), desc=" inner loop", position=1, leave=False):
        cnt += 1
        for std in [0.01,0.025,0.05]:    
            seed = np.random.SeedSequence()
            gen = np.random.Generator(np.random.MT19937(seed))
            L2 = get_L2_estimator(gen,M,N,S,std,jump,jump_prob,i,subsample,EMML_itr,E)
            ss = 0
            if(subsample):
                ss = 1
            new_row = {"L2": L2, "seed": str(seed.entropy),"M" : M,"N" : N,"S" : S,"std" : std,"jump" : jump,"jump_prob" : jump_prob,"epsi" : i,"subsample" : ss,"EMML_itr" : EMML_itr,"E" : E}
            DF4 = DF4.append(new_row,ignore_index=True)
        if(cnt%5 == 0):
            DF4.to_csv('data_F4.csv')
    DF4.to_csv('data_F4.csv')

## Data simulation for figure 5

In [None]:
# M = 20 #Number of point pair in one Batch
N = 20  #Number of Batches
S = 300  #Number of subsampling, should be smaller than M*N
# std = 0.05  #Standard diviation for distance
jump = 0.3  #Distance of shift
jump_prob = 0.5  #Probability of shifting
ve = 0.01 #Sinkhorn regulariser, suggested order is O(M*N^{-1/dim})
subsample = True
EMML_itr = 40000  #itr for minimisation algo (make it large when M is large)
E = 100  #resolution for output image
DF5 = pd.DataFrame(columns = ["L2","seed","M","N","S","std","jump","jump_prob","epsi","subsample","EMML_itr","E"])

num_sim = 100 #number of simulations
for _ in tqdm(range(num_sim),desc=" outer", position=0):
    cnt = 0
    for M in tqdm(np.unique(np.logspace(0,3.5,32,dtype = int)), desc=" inner loop", position=1, leave=False):
        cnt += 1
        for std in [0.01,0.025,0.05]:    
            seed = np.random.SeedSequence()
            gen = np.random.Generator(np.random.MT19937(seed))
            L2 = get_L2_estimator(gen,M,N,S,std,jump,jump_prob,ve,subsample,EMML_itr,E)
            ss = 0
            if(subsample):
                ss = 1
            new_row = {"L2": L2, "seed": str(seed.entropy),"M" : M,"N" : N,"S" : S,"std" : std,"jump" : jump,"jump_prob" : jump_prob,"epsi" : ve,"subsample" : ss,"EMML_itr" : EMML_itr,"E" : E}
            DF5 = DF5.append(new_row,ignore_index=True)
        if(cnt%5 == 0):
            DF5.to_csv('data_F5.csv')
    DF5.to_csv('data_F5.csv')