## Import libraries

In [None]:
import os
import json
import jsonlines

import numpy as np
from scipy.spatial.distance import euclidean, hamming
from scipy.stats import spearmanr, pearsonr

In [None]:
def read_memmap(filepath):
    with open(filepath.replace(".dat", ".conf"), "r") as fin_config:
        memmap_configs = json.load(fin_config)
        return np.memmap(filepath, mode="r", shape=tuple(memmap_configs["shape"]), dtype=memmap_configs["dtype"])

In [None]:
def format_distance_3bits(a, b):
    a = list(f"{a:03b}")
    b = list(f"{b:03b}")
    return 3*hamming(a, b)

In [None]:
result_dir = "../../../results"

## Correlation analysis

### Correlation with consistency (last layer)

In [None]:
os.makedirs(f"correlation_analysis_table", exist_ok=True)

dataset_names = ["CommonsenseQA", "QASC", "100TFQA", "GSM8K"]
model_names = ["Phi-3.5-mini-instruct", "Llama-3.1-8B-Instruct"]
prompting_strategies = ["zero-shot", "zero-shot-cot", "few-shot", "few-shot-cot"]

corr_matrix, p_matrix = np.zeros((len(model_names), len(dataset_names), len(prompting_strategies))), np.zeros((len(model_names), len(dataset_names), len(prompting_strategies)))
for i, model_name in enumerate(model_names):
    for j, dataset_name in enumerate(dataset_names):
        for k, prompting_strategy in enumerate(prompting_strategies):
            output_dir = f"{result_dir}/{dataset_name}/{model_name}"

            layer_wise_path = os.path.join(output_dir, f"{prompting_strategy}_layer_wise_hidden_states.dat")
            head_wise_path = os.path.join(output_dir, f"{prompting_strategy}_head_wise_hidden_states.dat")

            try:
                layer_wise_hidden_states = read_memmap(layer_wise_path)
            except Exception as e:
                # print(e)
                corr_matrix[i][j][k] = -2
                p_matrix[i][j][k] = -2
                continue
            num_samples, num_formats, num_layers, hidden_size = layer_wise_hidden_states.shape

            # Prepare pairwise consistency matrix (macro)
            score_path = os.path.join(output_dir, f"{prompting_strategy}_score.json")
            with open(score_path, "r") as fin:
                score = json.load(fin)
            pairwise_consistency_matrix = np.ones((num_formats, num_formats))
            for key, value in score["consistency"].items():
                if key == "mean":
                    continue
                split_key = key.split("_")
                ii, jj = int(split_key[0]), int(split_key[-1])
                pairwise_consistency_matrix[ii][jj] = value
                pairwise_consistency_matrix[jj][ii] = value

            for layer_idx in range(num_layers):
                if layer_idx != num_layers - 1:
                    continue

                # Step 1: Prepare input
                X = layer_wise_hidden_states[:,:,layer_idx,:]
                Y = np.tile(np.arange(num_formats), num_samples)
                X_mean = X.mean(axis=0)
                
                # Step 2: Macro analysis
                pairwise_distance_list, pairwise_consistency_list = [], []
                for ii in range(num_formats):
                    for jj in range(num_formats):
                        if ii < jj:
                            dist = euclidean(X_mean[ii], X_mean[jj])
                            pairwise_distance_list.append(dist)
                            pairwise_consistency_list.append(pairwise_consistency_matrix[ii][jj])

                pearson_corr, pearson_p = pearsonr(pairwise_distance_list, pairwise_consistency_list)
                spearman_corr, spearman_p = spearmanr(pairwise_distance_list, pairwise_consistency_list)

                # Print correlation results
                print(f"{dataset_name} / {model_name} / {prompting_strategy}")
                print(f"Layer {layer_idx}")
                print("Macro analysis")
                print(f"Pearson correlation: {pearson_corr:.2f} (p-value: {pearson_p:.3e})")
                print(f"Spearman correlation: {spearman_corr:.2f} (p-value: {spearman_p:.3e})")
                print()

                corr_matrix[i][j][k] = spearman_corr
                p_matrix[i][j][k] = spearman_p

prompting_strategies = ["Zero-shot", "Zero-shot CoT", "Few-shot", "Few-shot CoT"]
# Create LaTeX table string
latex_code = "\\begin{table}[ht]\n"
latex_code += "\\centering\n"
latex_code += "\\caption{Correlation analysis between last layer embedding distance and pairwise consistency. Each cell represents Spearman correlation coefficient and p-value (in parenthesis).}\n"
latex_code += "\\label{tab:last_layer_embedding_distance_correlation}\n"
latex_code += "\\begin{tabular}{c|c|cccc}\n"
latex_code += "\\toprule\n"
latex_code += "Model & Task & " + " & ".join(prompting_strategies) + " \\\\\n"
latex_code += "\\midrule\\midrule\n"

# Fill the table with data
for model_idx, model in enumerate(model_names):
    latex_code += f"\\multirow{{4}}{{*}}{{{model}}}\n"
    for task_idx, task in enumerate(dataset_names):
        latex_code += "      "
        latex_code += f"& {task} "
        for strat_idx in range(len(prompting_strategies)):
            corr = corr_matrix[model_idx, task_idx, strat_idx]
            p = p_matrix[model_idx, task_idx, strat_idx]

            # Skip conditions
            if corr < -1.5:
                latex_code += f"& - "
                continue

            corr_str = f"{corr:.2f}"
            p_str = f"{p:.3e}" if p < 0.001 else f"{p:.3f}"
            
            latex_code += f"& {corr_str} ({p_str}) "
        latex_code += "\\\\\n"
    latex_code += "\\midrule\n"

# Close LaTeX table
latex_code = latex_code.rstrip("\\midrule\n")  # Remove last midrule
latex_code += "\\\\\n"
latex_code += "\\bottomrule\n"
latex_code += "\\end{tabular}\n"
latex_code += "\\end{table}"

# Display the generated LaTeX table code
print(latex_code)

with open(f"correlation_analysis_table/last_layer_embedding_distance_correlation_with_consistency_table.txt", "w") as fout:
    fout.write(latex_code)

### Correlation with consistency (best layer)

In [None]:
os.makedirs(f"correlation_analysis_table", exist_ok=True)

dataset_names = ["CommonsenseQA", "QASC", "100TFQA", "GSM8K"]
model_names = ["Phi-3.5-mini-instruct", "Llama-3.1-8B-Instruct"]
prompting_strategies = ["zero-shot", "zero-shot-cot", "few-shot", "few-shot-cot"]

corr_matrix, p_matrix = np.zeros((len(model_names), len(dataset_names), len(prompting_strategies))), np.zeros((len(model_names), len(dataset_names), len(prompting_strategies)))
for i, model_name in enumerate(model_names):
    for j, dataset_name in enumerate(dataset_names):
        for k, prompting_strategy in enumerate(prompting_strategies):
            output_dir = f"{result_dir}/{dataset_name}/{model_name}"

            layer_wise_path = os.path.join(output_dir, f"{prompting_strategy}_layer_wise_hidden_states.dat")
            head_wise_path = os.path.join(output_dir, f"{prompting_strategy}_head_wise_hidden_states.dat")

            try:
                layer_wise_hidden_states = read_memmap(layer_wise_path)
            except Exception as e:
                # print(e)
                corr_matrix[i][j][k] = -2
                p_matrix[i][j][k] = -2
                continue
            num_samples, num_formats, num_layers, hidden_size = layer_wise_hidden_states.shape

            # Prepare pairwise consistency matrix (macro)
            score_path = os.path.join(output_dir, f"{prompting_strategy}_score.json")
            with open(score_path, "r") as fin:
                score = json.load(fin)
            pairwise_consistency_matrix = np.ones((num_formats, num_formats))
            for key, value in score["consistency"].items():
                if key == "mean":
                    continue
                split_key = key.split("_")
                ii, jj = int(split_key[0]), int(split_key[-1])
                pairwise_consistency_matrix[ii][jj] = value
                pairwise_consistency_matrix[jj][ii] = value

            for layer_idx in range(num_layers):
                # Step 1: Prepare input
                X = layer_wise_hidden_states[:,:,layer_idx,:]
                Y = np.tile(np.arange(num_formats), num_samples)
                X_mean = X.mean(axis=0)
                
                # Step 2: Macro analysis
                pairwise_distance_list, pairwise_consistency_list = [], []
                for ii in range(num_formats):
                    for jj in range(num_formats):
                        if ii < jj:
                            dist = euclidean(X_mean[ii], X_mean[jj])
                            pairwise_distance_list.append(dist)
                            pairwise_consistency_list.append(pairwise_consistency_matrix[ii][jj])

                pearson_corr, pearson_p = pearsonr(pairwise_distance_list, pairwise_consistency_list)
                spearman_corr, spearman_p = spearmanr(pairwise_distance_list, pairwise_consistency_list)

                # Print correlation results
                print(f"{dataset_name} / {model_name} / {prompting_strategy}")
                print(f"Layer {layer_idx}")
                print("Macro analysis")
                print(f"Pearson correlation: {pearson_corr:.2f} (p-value: {pearson_p:.3e})")
                print(f"Spearman correlation: {spearman_corr:.2f} (p-value: {spearman_p:.3e})")
                print()

                if spearman_corr < corr_matrix[i][j][k]:
                    corr_matrix[i][j][k] = spearman_corr
                    p_matrix[i][j][k] = spearman_p

prompting_strategies = ["Zero-shot", "Zero-shot CoT", "Few-shot", "Few-shot CoT"]
# Create LaTeX table string
latex_code = "\\begin{table}[ht]\n"
latex_code += "\\centering\n"
latex_code += "\\caption{Correlation analysis between best layer embedding distance and pairwise consistency. A layer in which the correlation is maximized is selected as the best layer. Each cell represents Spearman correlation coefficient and p-value (in parenthesis).}\n"
latex_code += "\\label{tab:best_layer_embedding_distance_correlation}\n"
latex_code += "\\begin{tabular}{c|c|cccc}\n"
latex_code += "\\toprule\n"
latex_code += "Model & Task & " + " & ".join(prompting_strategies) + " \\\\\n"
latex_code += "\\midrule\\midrule\n"

# Fill the table with data
for model_idx, model in enumerate(model_names):
    latex_code += f"\\multirow{{4}}{{*}}{{{model}}}\n"
    for task_idx, task in enumerate(dataset_names):
        latex_code += "      "
        latex_code += f"& {task} "
        for strat_idx in range(len(prompting_strategies)):
            corr = corr_matrix[model_idx, task_idx, strat_idx]
            p = p_matrix[model_idx, task_idx, strat_idx]

            # Skip conditions
            if corr < -1.5:
                latex_code += f"& - "
                continue

            corr_str = f"{corr:.2f}"
            p_str = f"{p:.3e}" if p < 0.001 else f"{p:.3f}"
            
            latex_code += f"& {corr_str} ({p_str}) "
        latex_code += "\\\\\n"
    latex_code += "\\midrule\n"

# Close LaTeX table
latex_code = latex_code.rstrip("\\midrule\n")  # Remove last midrule
latex_code += "\\\\\n"
latex_code += "\\bottomrule\n"
latex_code += "\\end{tabular}\n"
latex_code += "\\end{table}"

# Display the generated LaTeX table code
print(latex_code)

with open(f"correlation_analysis_table/best_layer_embedding_distance_correlation_with_consistency_table.txt", "w") as fout:
    fout.write(latex_code)

### Correlation with format distance (last layer)

In [None]:
os.makedirs(f"correlation_analysis_table", exist_ok=True)

dataset_names = ["CommonsenseQA", "QASC", "100TFQA", "GSM8K"]
model_names = ["Phi-3.5-mini-instruct", "Llama-3.1-8B-Instruct"]
prompting_strategies = ["zero-shot", "zero-shot-cot", "few-shot", "few-shot-cot"]

corr_matrix, p_matrix = np.zeros((len(model_names), len(dataset_names), len(prompting_strategies))), np.zeros((len(model_names), len(dataset_names), len(prompting_strategies)))
for i, model_name in enumerate(model_names):
    for j, dataset_name in enumerate(dataset_names):
        for k, prompting_strategy in enumerate(prompting_strategies):
            output_dir = f"{result_dir}/{dataset_name}/{model_name}"

            layer_wise_path = os.path.join(output_dir, f"{prompting_strategy}_layer_wise_hidden_states.dat")
            head_wise_path = os.path.join(output_dir, f"{prompting_strategy}_head_wise_hidden_states.dat")

            try:
                layer_wise_hidden_states = read_memmap(layer_wise_path)
            except Exception as e:
                # print(e)
                corr_matrix[i][j][k] = -2
                p_matrix[i][j][k] = -2
                continue
            num_samples, num_formats, num_layers, hidden_size = layer_wise_hidden_states.shape

            # Prepare format distance matrix (macro)
            score_path = os.path.join(output_dir, f"{prompting_strategy}_score.json")
            with open(score_path, "r") as fin:
                score = json.load(fin)
            format_distance_matrix = np.ones((num_formats, num_formats))
            for key, value in score["consistency"].items():
                if key == "mean":
                    continue
                split_key = key.split("_")
                ii, jj = int(split_key[0]), int(split_key[-1])
                distance = format_distance_3bits(ii, jj)
                format_distance_matrix[ii][jj] = distance
                format_distance_matrix[jj][ii] = distance

            for layer_idx in range(num_layers):
                if layer_idx != num_layers - 1:
                    continue

                # Step 1: Prepare input
                X = layer_wise_hidden_states[:,:,layer_idx,:]
                Y = np.tile(np.arange(num_formats), num_samples)
                X_mean = X.mean(axis=0)
                
                # Step 2: Macro analysis
                pairwise_distance_list, format_distance_list = [], []
                for ii in range(num_formats):
                    for jj in range(num_formats):
                        if ii < jj:
                            dist = euclidean(X_mean[ii], X_mean[jj])
                            pairwise_distance_list.append(dist)
                            format_distance_list.append(format_distance_matrix[ii][jj])

                pearson_corr, pearson_p = pearsonr(pairwise_distance_list, format_distance_list)
                spearman_corr, spearman_p = spearmanr(pairwise_distance_list, format_distance_list)

                # Print correlation results
                print(f"{dataset_name} / {model_name} / {prompting_strategy}")
                print(f"Layer {layer_idx}")
                print("Macro analysis")
                print(f"Pearson correlation: {pearson_corr:.2f} (p-value: {pearson_p:.3e})")
                print(f"Spearman correlation: {spearman_corr:.2f} (p-value: {spearman_p:.3e})")
                print()

                corr_matrix[i][j][k] = spearman_corr
                p_matrix[i][j][k] = spearman_p

prompting_strategies = ["Zero-shot", "Zero-shot CoT", "Few-shot", "Few-shot CoT"]
# Create LaTeX table string
latex_code = "\\begin{table}[ht]\n"
latex_code += "\\centering\n"
latex_code += "\\caption{Correlation analysis between format distance and last layer embedding distance. Each cell represents Spearman correlation coefficient and p-value (in parenthesis).}\n"
latex_code += "\\label{tab:last_layer_embedding_distance_correlation_with_format_distance}\n"
latex_code += "\\begin{tabular}{c|c|cccc}\n"
latex_code += "\\toprule\n"
latex_code += "Model & Task & " + " & ".join(prompting_strategies) + " \\\\\n"
latex_code += "\\midrule\\midrule\n"

# Fill the table with data
for model_idx, model in enumerate(model_names):
    latex_code += f"\\multirow{{4}}{{*}}{{{model}}}\n"
    for task_idx, task in enumerate(dataset_names):
        latex_code += "      "
        latex_code += f"& {task} "
        for strat_idx in range(len(prompting_strategies)):
            corr = corr_matrix[model_idx, task_idx, strat_idx]
            p = p_matrix[model_idx, task_idx, strat_idx]

            # Skip conditions
            if corr < -1.5:
                latex_code += f"& - "
                continue

            corr_str = f"{corr:.2f}"
            p_str = f"{p:.3e}" if p < 0.001 else f"{p:.3f}"
            
            latex_code += f"& {corr_str} ({p_str}) "
        latex_code += "\\\\\n"
    latex_code += "\\midrule\n"

# Close LaTeX table
latex_code = latex_code.rstrip("\\midrule\n")  # Remove last midrule
latex_code += "\\\\\n"
latex_code += "\\bottomrule\n"
latex_code += "\\end{tabular}\n"
latex_code += "\\end{table}"

# Display the generated LaTeX table code
print(latex_code)

with open(f"correlation_analysis_table/last_layer_embedding_distance_correlation_with_format_distance_table.txt", "w") as fout:
    fout.write(latex_code)

### Correlation with format distance (best layer)

In [None]:
os.makedirs(f"correlation_analysis_table", exist_ok=True)

dataset_names = ["CommonsenseQA", "QASC", "100TFQA", "GSM8K"]
model_names = ["Phi-3.5-mini-instruct", "Llama-3.1-8B-Instruct"]
prompting_strategies = ["zero-shot", "zero-shot-cot", "few-shot", "few-shot-cot"]

corr_matrix, p_matrix = np.zeros((len(model_names), len(dataset_names), len(prompting_strategies))), np.zeros((len(model_names), len(dataset_names), len(prompting_strategies)))
for i, model_name in enumerate(model_names):
    for j, dataset_name in enumerate(dataset_names):
        for k, prompting_strategy in enumerate(prompting_strategies):
            output_dir = f"{result_dir}/{dataset_name}/{model_name}"

            layer_wise_path = os.path.join(output_dir, f"{prompting_strategy}_layer_wise_hidden_states.dat")
            head_wise_path = os.path.join(output_dir, f"{prompting_strategy}_head_wise_hidden_states.dat")

            try:
                layer_wise_hidden_states = read_memmap(layer_wise_path)
            except Exception as e:
                # print(e)
                corr_matrix[i][j][k] = -2
                p_matrix[i][j][k] = -2
                continue
            num_samples, num_formats, num_layers, hidden_size = layer_wise_hidden_states.shape

            # Prepare format distance matrix (macro)
            score_path = os.path.join(output_dir, f"{prompting_strategy}_score.json")
            with open(score_path, "r") as fin:
                score = json.load(fin)
            format_distance_matrix = np.ones((num_formats, num_formats))
            for key, value in score["consistency"].items():
                if key == "mean":
                    continue
                split_key = key.split("_")
                ii, jj = int(split_key[0]), int(split_key[-1])
                distance = format_distance_3bits(ii, jj)
                format_distance_matrix[ii][jj] = distance
                format_distance_matrix[jj][ii] = distance

            for layer_idx in range(num_layers):
                # Step 1: Prepare input
                X = layer_wise_hidden_states[:,:,layer_idx,:]
                Y = np.tile(np.arange(num_formats), num_samples)
                X_mean = X.mean(axis=0)
                
                # Step 2: Macro analysis
                pairwise_distance_list, format_distance_list = [], []
                for ii in range(num_formats):
                    for jj in range(num_formats):
                        if ii < jj:
                            dist = euclidean(X_mean[ii], X_mean[jj])
                            pairwise_distance_list.append(dist)
                            format_distance_list.append(format_distance_matrix[ii][jj])

                pearson_corr, pearson_p = pearsonr(pairwise_distance_list, format_distance_list)
                spearman_corr, spearman_p = spearmanr(pairwise_distance_list, format_distance_list)

                # Print correlation results
                print(f"{dataset_name} / {model_name} / {prompting_strategy}")
                print(f"Layer {layer_idx}")
                print("Macro analysis")
                print(f"Pearson correlation: {pearson_corr:.2f} (p-value: {pearson_p:.3e})")
                print(f"Spearman correlation: {spearman_corr:.2f} (p-value: {spearman_p:.3e})")
                print()

                if spearman_corr > corr_matrix[i][j][k]:
                    corr_matrix[i][j][k] = spearman_corr
                    p_matrix[i][j][k] = spearman_p

prompting_strategies = ["Zero-shot", "Zero-shot CoT", "Few-shot", "Few-shot CoT"]
# Create LaTeX table string
latex_code = "\\begin{table}[ht]\n"
latex_code += "\\centering\n"
latex_code += "\\caption{Correlation analysis between format distance and best layer embedding distance. A layer in which the correlation is maximized is selected as the best layer. Each cell represents Spearman correlation coefficient and p-value (in parenthesis).}\n"
latex_code += "\\label{tab:best_layer_embedding_distance_correlation_with_format_distance}\n"
latex_code += "\\begin{tabular}{c|c|cccc}\n"
latex_code += "\\toprule\n"
latex_code += "Model & Task & " + " & ".join(prompting_strategies) + " \\\\\n"
latex_code += "\\midrule\\midrule\n"

# Fill the table with data
for model_idx, model in enumerate(model_names):
    latex_code += f"\\multirow{{4}}{{*}}{{{model}}}\n"
    for task_idx, task in enumerate(dataset_names):
        latex_code += "      "
        latex_code += f"& {task} "
        for strat_idx in range(len(prompting_strategies)):
            corr = corr_matrix[model_idx, task_idx, strat_idx]
            p = p_matrix[model_idx, task_idx, strat_idx]

            # Skip conditions
            if corr < -1.5:
                latex_code += f"& - "
                continue

            corr_str = f"{corr:.2f}"
            p_str = f"{p:.3e}" if p < 0.001 else f"{p:.3f}"
            
            latex_code += f"& {corr_str} ({p_str}) "
        latex_code += "\\\\\n"
    latex_code += "\\midrule\n"

# Close LaTeX table
latex_code = latex_code.rstrip("\\midrule\n")  # Remove last midrule
latex_code += "\\\\\n"
latex_code += "\\bottomrule\n"
latex_code += "\\end{tabular}\n"
latex_code += "\\end{table}"

# Display the generated LaTeX table code
print(latex_code)

with open(f"correlation_analysis_table/best_layer_embedding_distance_correlation_with_format_distance_table.txt", "w") as fout:
    fout.write(latex_code)