# Attention vs ARI Comparison
In this notebook we aim to compare the SAS trajectory patterns with the vowel clustering results. 

In [32]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# from C_0B_eval import *
from C_0X_defs import *
from C_0Y_evaldefs import *
from scipy.stats import sem, ttest_ind
from misc_recorder import ListRecorder
import pandas as pd

In [33]:
ts = "0915163757" # this timestamp does not contain run number
train_name = "C_0Tg"
res_save_dir = os.path.join(model_save_, f"eval-{train_name}-{ts}")
swapped_res_save_dir = os.path.join(model_save_, f"evalswapped-{train_name}-{ts}")

In [15]:
results = []
for test_type in ['normal', 'swapped']:
    for model_type in ['recon4-phi', 'recon8-phi', 'recon16-phi', 'recon32-phi', 'recon64-phi', 'recon96-phi']: 
        for model_condition in ['b']: 
            for run in range(1, 6): 
                if test_type == 'normal': 
                    res_read_dir = os.path.join(res_save_dir, f"{model_type}", f"{model_condition}", f"{run}")
                elif test_type == 'swapped': 
                    res_read_dir = os.path.join(swapped_res_save_dir, f"{model_type}", f"{model_condition}", f"{run}")
                
                for epoch in range(0, 101): 
                    hidrep_handler = DictResHandler(whole_res_dir=res_read_dir, 
                                        file_prefix=f"all-{epoch}")
                    hidrep_handler.read()
                    hidrep = hidrep_handler.res
                    dimension = 0
                    if model_type == 'recon4-phi':
                        dimension = 4
                    elif model_type == 'recon8-phi': 
                        dimension = 8
                    elif model_type == 'recon16-phi': 
                        dimension = 16
                    elif model_type == 'recon32-phi': 
                        dimension = 32
                    elif model_type == 'recon64-phi':
                        dimension = 64
                    elif model_type == 'recon96-phi':
                        dimension = 96

                    ori = hidrep['ori']
                    recon = hidrep['recon']

                    all_sepframes1 = hidrep["sep-frame1"]
                    all_sepframes2 = hidrep["sep-frame2"]

                    hidr_ori, tags_ori = get_toplot(hiddens=all_ori, 
                                                    sepframes1=all_sepframes1,
                                                    sepframes2=all_sepframes2,
                                                    phi_types=all_phi_type,
                                                    stop_names=all_stop_names,
                                                    offsets=(0, 1), 
                                                    contrast_in=test_name_label, 
                                                    merge=merge_one_vector, 
                                                    hidden_dim=64, 
                                                    lookat=test_name_lookat)

                    stacked_ori = np.vstack(ori)
                    stacked_recon = np.vstack(recon)

                    data = np.mean(np.square(stacked_ori - stacked_recon))

                    datadict = {'dimension': dimension, 'model_condition': model_condition, 'run': run, "epoch": epoch, "test_type": test_type, 'recon_loss': data}
                    results.append(datadict)

In [20]:
resultsdf = pd.DataFrame(results)

In [21]:
resultsdf.to_csv(f"{model_save_}/recon_loss_{ts}.csv", index=False)

In [22]:
resultsdf = pd.read_csv(f"{model_save_}/recon_loss_{ts}.csv")

In [23]:
def cosine_similarity_axis(A: np.ndarray, B: np.ndarray, axis: int = 0) -> np.ndarray:
    # Calculate the dot product along the specified axis
    dot_product = np.sum(A * B, axis=axis)
    # Calculate the norm (magnitude) of each vector along the specified axis
    norm_A = np.linalg.norm(A, axis=axis)
    norm_B = np.linalg.norm(B, axis=axis)
    # Calculate the cosine similarity
    cosine_sim = dot_product / (norm_A * norm_B)
    return cosine_sim

In [31]:
cosine_similarity_axis(stacked_ori, stacked_recon, axis=1).shape

(19376,)

In [24]:
def df_to_np_array(df, run_col, epoch_col, score_col):
    # Pivot the DataFrame to get a table with runs as rows and epochs as columns
    pivot_df = df.pivot(index=run_col, columns=epoch_col, values=score_col)
    # Convert the pivoted DataFrame to a NumPy array
    result_array = pivot_df.values
    return result_array

In [26]:
from C_0Tf_n_integrate_abx_pph import plot_many
mk(f"{model_save_}/recon_loss_{ts}/")
for dimension in [4, 8, 16, 32, 64, 96]:
    col = []
    for test_type in ['normal', 'swapped']: 
        to_add_data = resultsdf[(resultsdf['dimension'] == dimension) & (resultsdf['test_type'] == test_type)]
        to_add_data_good = df_to_np_array(to_add_data, 'run', 'epoch', 'recon_loss')
        col.append(to_add_data_good)
    plot_many(col, ['normal', 'swapped'], f"{model_save_}/recon_loss_{ts}/{dimension}_recon_loss.png")

In [18]:
resultsdf

Unnamed: 0,dimension,model_condition,run,epoch,test_type,recon_loss
0,4,b,1,0,normal,1.172213
1,4,b,1,1,normal,0.981734
2,4,b,1,2,normal,0.784990
3,4,b,1,3,normal,0.688995
4,4,b,1,4,normal,0.643922
...,...,...,...,...,...,...
6055,0,b,5,96,swapped,0.021762
6056,0,b,5,97,swapped,0.021320
6057,0,b,5,98,swapped,0.021936
6058,0,b,5,99,swapped,0.021412


In [None]:
resultsdf = pd.DataFrame(results)

In [None]:
resultsdf

Unnamed: 0,dimension,model_condition,run,recon_loss
0,4,u,1,0.360552
1,4,u,2,0.364517
2,4,u,3,0.363051
3,4,u,4,0.399515
4,4,u,5,0.361524
5,4,b,1,0.393435
6,4,b,2,0.329338
7,4,b,3,0.385332
8,4,b,4,0.46426
9,4,b,5,0.435483


In [None]:
# Perform the groupby and calculate the mean for the remaining column
grouped_df = resultsdf.groupby(["dimension", "model_condition"])["recon_loss"].mean().reset_index()

In [None]:
grouped_df

Unnamed: 0,dimension,model_condition,recon_loss
0,4,b,0.401569
1,4,u,0.369832
2,8,b,0.222804
3,8,u,0.21629
4,16,b,0.125158
5,16,u,0.119112
6,32,b,0.078389
7,32,u,0.075468


In [None]:
gbres = grouped_df[grouped_df['model_condition'] == 'b']
gures = grouped_df[grouped_df['model_condition'] == 'u']

In [None]:
outstr = ""
for dim in [4, 8, 16, 32]: 
    r = gbres[gbres['dimension'] == dim]['recon_loss'].values[0]
    outstr += "\overline{\epsilon}_{%d} = %.4f, " % (dim, r)

In [None]:
print(outstr)

\overline{\epsilon}_{4} = 0.4016, \overline{\epsilon}_{8} = 0.2228, \overline{\epsilon}_{16} = 0.1252, \overline{\epsilon}_{32} = 0.0784, 


In [None]:
outstr = ""
for dim in [4, 8, 16, 32]: 
    r = gures[gures['dimension'] == dim]['recon_loss'].values[0]
    outstr += "\overline{\epsilon}_{%d} = %.4f, " % (dim, r)

In [None]:
print(outstr)

\overline{\epsilon}_{4} = 0.3698, \overline{\epsilon}_{8} = 0.2163, \overline{\epsilon}_{16} = 0.1191, \overline{\epsilon}_{32} = 0.0755, 


In [None]:
bres = resultsdf[resultsdf['model_condition'] == 'b']
ures = resultsdf[resultsdf['model_condition'] == 'u']

In [None]:
bres["recon_loss"].corr(bres["dimension"]), ures["recon_loss"].corr(ures["dimension"])

(-0.8299008887209766, -0.8565714128555049)