In [1]:
import numpy as np
import arviz as az
import math
# import pymc3 as pm
import pickle
import matplotlib.pyplot as plt
from google.protobuf import text_format
from scipy.stats import multivariate_normal as mvn
from scipy.stats import skewnorm
from scipy.stats import norm
from scipy.interpolate import griddata
import pp_mix.protos.py.params_pb2 as params_pb2

from pp_mix.interface import ConditionalMCMC, cluster_estimate
from pp_mix.utils import loadChains, to_numpy, to_proto
from pp_mix.protos.py.state_pb2 import MultivariateMixtureState, EigenVector, EigenMatrix
from pp_mix.protos.py.params_pb2 import Params

# Generate data

assuming delta = I identical for all clusters

In [3]:
def generate_etas(mus, deltas_cov, cluster_alloc):
    np.random.seed(seed=233423)
    out = np.vstack([[mvn.rvs(mean = mus[i,:], cov = deltas_cov) for i in cluster_alloc]])
    return out

def generate_data(Lambda, etas, sigma_bar_cov):
    np.random.seed(seed=233423)
    means = np.matmul(Lambda,etas.T)
    sigma_bar_cov_mat = np.diag(sigma_bar_cov)
    out = np.vstack([[mvn.rvs(mean = means[:,i], cov = sigma_bar_cov_mat) for i in range(etas.shape[0])]])
    return out

def create_lambda(p,d):
    if p % d != 0:
        raise ValueError("Non compatible dimensions p and d: p={0}, d={1}".format(p,d))
    
    h = math.floor(p/d)
    Lambda=np.zeros((p,d))
    for i in range(d):
        Lambda[i*h:i*h+h,i] = np.ones(h)
        
    return Lambda

def create_mus(d,M,dist):
    mus = np.zeros((M,d))
    tot_range = (M-1)*dist 
    max_mu = tot_range/2
    for i in range(M):
        mus[i,:] = np.repeat(max_mu-i*dist, d)
        
    return mus

def create_cluster_alloc(n_pc,M):
    return np.repeat(range(M),n_pc)    

In [4]:
import csv
dist=5
p_s = [50, 100]
d_s = [2, 5]
M_s = [4]
n_percluster_s = [50, 200]

for p in p_s:
    sigma_bar_prec = np.repeat(2, p)
    sigma_bar_cov = 1/sigma_bar_prec
    for d in d_s:
        lamb = create_lambda(p,d)
        delta_cov = np.eye(d)
        for M in M_s:
            mus = create_mus(d,M,dist)
            for n_percluster in n_percluster_s:                
                cluster_alloc = create_cluster_alloc(n_percluster,M)
                etas = generate_etas(mus, delta_cov, cluster_alloc)
                data = generate_data(lamb, etas, sigma_bar_cov)
                #with open("data/data_script_sim/p_{0}_d_{1}_M_{2}_nperclus_{3}_data.csv".format(p,d,M,n_percluster),"w+") as my_csv:
                    csvWriter = csv.writer(my_csv, delimiter=',')
                    csvWriter.writerows(data)




In [2]:
import pandas as pd
with open("data/data_script_sim/p_50_d_2_M_4_nperclus_50_data.csv", newline='') as my_csv:
    data = pd.read_csv(my_csv, sep=',', header=None).values
    
print(data.shape)

(200, 50)
