In [4]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
from ast import literal_eval


data = pd.read_csv("data/distribution_statistics_with_random.csv") 
for list_cols in ['input_question','layers', 'heterogenity','eigen_values_adj','eigen_values_lap']:
    data[list_cols] =  data[list_cols].apply(lambda x: literal_eval(x.replace("nan","0"))) 

model_list = data["model"].unique()
few_shot_list = data["few_shot"].unique() 
threshold_list = data["threshold"].unique() 

data["sorted_eigen_values_adj"] = data['eigen_values_adj'].apply(lambda x: [sorted([abs(e)for e in list_eigen])for list_eigen in x]) 
data["spectral_gap_adj"] = data['sorted_eigen_values_adj'].apply(lambda x: [list_eigen[-1]-list_eigen[-2] for list_eigen in x])

#data["heterogeneity_std_dev"] = data['heterogenity'].apply(lambda x: np.std(np.diff([x if xx != None else 0 for xx in x])))
#data["spectral_gap_std_dev"] = data['spectral_gap_adj'].apply(lambda x: np.std(np.diff([x if xx != None else 0 for xx in x])))



In [5]:
analyze_plot = []
for model in model_list:
    for few_shot in few_shot_list:
        for threshold in threshold_list:
            filtered_data = data[(data["model"]==model)&(data["few_shot"]==few_shot)&(data["threshold"]==threshold)]
            for questions, layers,heterogenity_value,gamma_values,spectral_gap,random  in zip(filtered_data["input_question"].tolist(),filtered_data["layers"].tolist(),filtered_data["heterogenity"].tolist(),filtered_data["gamma"].tolist(),filtered_data["spectral_gap_adj"].tolist(),filtered_data["random"].tolist()):
                heterogeneity_std_dev = np.std(np.diff(heterogenity_value))
                spectral_gap_std_dev = np.std(np.diff(spectral_gap))
                if False:
                    fig, [hetero, eigen] = plt.subplots(figsize=(12,4),ncols=2)
                    
                    hetero.plot(layers,heterogenity_value)
                    hetero.set_ylabel("Heterogeneity")
                    hetero.set_xlabel("Layers")



                    eigen.plot(layers,spectral_gap)
                    eigen.set_ylabel("Spectral Gap")
                    eigen.set_xlabel("Layers")
                    # Calculate the standard deviation of the first-order differences
                    heterogeneity_std_dev = np.std(np.diff(heterogenity_value))
                    hetero.set_title(heterogeneity_std_dev)
                    spectral_gap_std_dev = np.std(np.diff(spectral_gap))
                    eigen.set_title(spectral_gap_std_dev)

                    fig.suptitle(f"{model} | {few_shot}| {threshold} | {questions}| {random}")
                    plt.show()
                analyze_plot.append({"model":model,
                                        "few_shot":few_shot,
                                        "threshold":threshold,
                                        "random":random,
                                        "questions":questions,
                                        "std_heterogenity":heterogeneity_std_dev,
                                        "std_spectral_gap":spectral_gap_std_dev,
                                        })
df = pd.DataFrame(analyze_plot)

In [None]:
fig, [ax0,ax1] = plt.subplots(figsize=(10,4), ncols=2)
sns.scatterplot(data=df,x="few_shot", y = "std_heterogenity",hue="model",ax=ax0)
sns.scatterplot(data=df,x="few_shot", y = "std_spectral_gap",hue="model",ax=ax1)
ax0.set_title("Heterogenity")
ax1.set_title("Spectral Gap")
ax0.get_legend().remove()
fig.suptitle("Model")
plt.legend(loc=(1.04, 0))
plt.show()
fig, [ax0,ax1] = plt.subplots(figsize=(10,4), ncols=2)
sns.scatterplot(data=df,x="few_shot", y = "std_heterogenity",hue="random",ax=ax0)
sns.scatterplot(data=df,x="few_shot", y = "std_spectral_gap",hue="random",ax=ax1)
ax0.set_title("Heterogenity")
ax1.set_title("Spectral Gap")
ax0.get_legend().remove()
fig.suptitle("Random")
plt.legend(loc=(1.04, 0))
plt.show()
fig, [ax0,ax1] = plt.subplots(figsize=(10,4), ncols=2)
sns.scatterplot(data=df,x="few_shot", y = "std_heterogenity",hue="questions",ax=ax0)
sns.scatterplot(data=df,x="few_shot", y = "std_spectral_gap",hue="questions",ax=ax1)
ax0.set_title("Heterogenity")
ax0.get_legend().remove()
ax1.set_title("Spectral Gap")
fig.suptitle("Question")
plt.legend(loc=(1.04, 0))
plt.show()

for model in model_list:
    fig, [ax0,ax1] = plt.subplots(figsize=(14,4), ncols=2)
    sns.scatterplot(data=df[(df["random"]==False)&(df["model"]==model)],x="few_shot", y = "std_heterogenity",hue="questions",ax=ax0)
    sns.scatterplot(data=df[ (df["random"]==False)],x="few_shot", y = "std_spectral_gap",hue="questions",ax=ax1)
    ax0.set_title("Heterogenity")
    ax1.set_title("Spectral Gap")
    fig.suptitle(f"Few Shot Only Actual Input for {model}")
    ax0.get_legend().remove()
    plt.legend(loc=(1.04, 0))

    plt.show()
