In [1]:
import numpy as np
import matplotlib.pyplot as plt
from math import sqrt
from scipy import stats
from sklearn.metrics import adjusted_rand_score
import pandas as pd
from itertools import product

### Producing unique table for all Simulation A

In [None]:
p_s = [500,100]
d_s = [2,4,8]
list_all = []

for p in p_s:
  for d in d_s:
    with open("df_performance_applam_p{0}_d{1}.csv".format(p,d), newline='') as my_csv_applam:
        df_applam = pd.read_csv(my_csv_applam)

    with open("df_performance_lamb_p{0}_d{1}.csv".format(p,d), newline='') as my_csv_lamb:
        df_lamb = pd.read_csv(my_csv_lamb)

    list_all.append(df_applam)
    list_all.append(df_lamb)

df_all = pd.concat(list_all)
df_all.to_csv("df_all_results.csv")

# Producing Tables

In [None]:
p_s = [500,100]
d_s = [2,4,8]
conc_dir_s = [0.1, 0.5, 1]
rho_s = [0.5, 1, 5]

### Produce one csv for each p

In [None]:
with open("df_all_results.csv", newline='') as my_csv_all:
        df_all = pd.read_csv(my_csv_all)

for p in p_s:

  ar_ind = []

  data = df_all.loc[df_all['p']==p]

  data_agg = data.groupby(['d','model']).agg(
      avg_avg_nclus = pd.NamedAgg(column = 'avg_nclus', aggfunc = mean)
      avg_mode_nclus = pd.NamedAgg(column = 'mode_nclus', aggfunc = mean)
      avg_ari_best = pd.NamedAgg(column = 'ari_best_clus', aggfunc = mean)
  )


  ar_lamb = [np.repeat(d,3),np.repeat("Lamb",3),conc_dir_s]
  ar_lamb = list(zip(*ar_lamb))

  ar_app = [np.repeat(d,3),np.repeat("APPLAM",3),rho_s]
  ar_app = list(zip(*ar_app))

  ar_ind = [*ar_ind, *ar_lamb,*ar_app]


  index = pd.MultiIndex.from_tuples(ar_ind, names=["Latent dim", "Model", "Parameter"])

  df_final = data_agg.set_index(index)

  df_final.to_csv("summarizing_dataframe_p_{0}.csv".format(p))



### Produce the table in latex format

In [None]:
for p in p_s:
    with open("summarizing_dataframe_p_{0}.csv".format(p), newline='') as my_csv:
        df = pd.read_csv(my_csv, index_col=[0,1,2])

    df["avg_avg_nclus"] = df["avg_avg_nclus"].round(2)
    df["avg_mode_nclus"] = df["avg_mode_nclus"].round(2)

    df["avg_ari_best"] = df["avg_ari_best"].round(2)

    print("p = ",p)

    print(df.to_latex())

# Producing boxplots

### Plot the boxplots

In [None]:
with open("df_all_results.csv", newline='') as my_csv_all:
        df_all = pd.read_csv(my_csv_all)

In [None]:
fig, axs = plt.subplots(2, 3)

for idx_d, d in enumerate(d_s):

  # APPLAM row
  data = df_all.loc[df_all['model']=='APPLAM' & df_all['d']==d]
  sns.boxplot(ax = axs[0,idx_d],
              x = data['p'],
              y = data['avg_nclus'],
              hue = data['intensity'],
              palette = 'husl')
  # Lamb row
  data = df_all.loc[df_all['model']=='Lamb' & df_all['d']==d]
  sns.boxplot(ax = axs[1,idx_d],
              x = data['p'],
              y = data['avg_nclus'],
              hue = data['conc_dir'],
              palette = 'husl')