## Preliminaris
Run ParameterCountPerModelPerComponent.ipynb to generate the data for this notebook.

In [None]:
import pandas as pd
import numpy as np
import os
import json
import matplotlib as mpl
from matplotlib.gridspec import GridSpec
import matplotlib.pyplot as plt
import pprint

In [None]:
# apply arial font, great font sizes, set dpi to 300
mpl.rc("font", size=16)
mpl.rc("axes", titlesize=16)
mpl.rc("axes", labelsize=16)
mpl.rc("xtick", labelsize=16)
mpl.rc("ytick", labelsize=16)
mpl.rc("legend", fontsize=14)
mpl.rc("figure", dpi=300)

### Get data from Experiment 1

In [None]:
# retrieve original model benchmarks
folder_path_original_benchmarks = "logs/original_model_benchmarks"
original_benchmarks = {}
for filename in os.listdir(folder_path_original_benchmarks):
    with open(os.path.join(folder_path_original_benchmarks, filename), "r") as f:
        raw_data = json.load(f)
        model_name = raw_data["model_name"]
        original_benchmarks[model_name] = raw_data["original_model_benchmarks"]

In [None]:
def generate_data() -> dict:
    data = {}
    folder_path = "logs/first_experiment"
    for filename in os.listdir(folder_path):
        # Check if the file is a JSON file
        if filename.endswith(".json"):
            # Construct the full path of the file
            file_path = os.path.join(folder_path, filename)

            # Open and parse the JSON file
            with open(file_path, "r") as file:
                model_data = json.load(file)

            # Store the parsed data in a dictionary using the filename as the key
            model_name = model_data["model_name"]
            original_model_benchmark = original_benchmarks[model_name]
            original_model_accuracy = original_model_benchmark["wikitext_accuracy"]
            include_component = model_data["include_component"]
            weight_bits = model_data["weight_bits"]
            quantized_model_benchmarks = model_data["quantized_model_benchmarks"]
            quantized_model_accuracy = quantized_model_benchmarks["wikitext_accuracy"]
            quantization_data = {
                "include_component": include_component,
                "weight_bits": weight_bits,
                "quantized_model_benchmarks": quantized_model_benchmarks,
                "quantized_model_accuracy": quantized_model_accuracy,
            }
            # If the model name does not exist in the dictionary, add it
            if model_name not in data:
                data[model_name] = {
                    # "original_model_accuracy": original_model_accuracy,
                }

            if include_component not in data[model_name]:
                data[model_name][include_component] = {}

            # Add the quantization data to the dictionary
            data[model_name][include_component][weight_bits] = quantization_data
    return data


exp_data = generate_data()
pprint.pprint(exp_data)

### Get parameter counts and memory per component from model_memory_per_component.csv

In [None]:
params_per_component = pd.read_csv("model_memory_per_component.csv")
params_per_component

In [None]:
correlation_per_model_wikitext = {}
correlation_per_model_mmlu = {}

for model_name, model_data in exp_data.items():
    for include_component, component_data in model_data.items():
        for weight_bits, data in component_data.items():
            wikitext_accuracy = data["quantized_model_benchmarks"]["wikitext_accuracy"]
            mmlu_overall_accuracy = data["quantized_model_benchmarks"]["mmlu_results"][
                "overall_score"
            ]

            if model_name not in correlation_per_model_wikitext:
                correlation_per_model_wikitext[model_name] = []
            if model_name not in correlation_per_model_mmlu:
                correlation_per_model_mmlu[model_name] = []

            parameter_count = (
                params_per_component.where(params_per_component["Model"] == model_name)
                .where(params_per_component["Component"] == include_component)
                .dropna()["Parameters"]
                .values[0]
            )

            correlation_per_model_wikitext[model_name].append(
                (wikitext_accuracy, parameter_count)
            )

            correlation_per_model_mmlu[model_name].append(
                (mmlu_overall_accuracy, parameter_count)
            )

print(correlation_per_model_wikitext)
print(correlation_per_model_mmlu)

## Show Pearson correlation between parameter count and memory per component

In [None]:
correlations = {}

# print pearson correlation between parameter count and accuracy for each model
for model_name, data in correlation_per_model_wikitext.items():
    x = [d[0] for d in data]
    y = [d[1] for d in data]
    if model_name not in correlations:
        correlations[model_name] = {}
    correlations[model_name]["wikitext"] = pd.Series(x).corr(pd.Series(y))
    print(f"{model_name} wikitext_accuracy: {pd.Series(x).corr(pd.Series(y))}")

for model_name, data in correlation_per_model_mmlu.items():
    x = [d[0] for d in data]
    y = [d[1] for d in data]
    if model_name not in correlations:
        correlations[model_name] = {}
    correlations[model_name]["mmlu"] = pd.Series(x).corr(pd.Series(y))
    print(f"{model_name} mmlu_accuracy: {pd.Series(x).corr(pd.Series(y))}")

# output as latex table
print("Pearson correlation between parameter count and accuracy for each model")
print("\\begin{table}[H]")
print("\\centering")
print("\\begin{tabular}{|c|c|c|}")
print("\\hline")
print(
    "Model & Pearson Correlation Parameter vs. Wikitext Accuracy & Pearson Correlation Parameter vs. MMLU Accuracy \\\\"
)
print("\\hline")
for model_name, data in correlations.items():
    print(f"{model_name} & {data['wikitext']:.2f} & {data['mmlu']:.2f} \\\\")
print("\\hline")
print("\\end{tabular}")
print("\\end{table}")


## Plot the data

In [None]:
# subplot the data using matplotlib
colors = [
    "tab:blue",
    "tab:orange",
    "tab:green",
]

fig = plt.figure(figsize=(16, 16))
spec = GridSpec(2, 1, height_ratios=[1, 1], figure=fig)

ax = [fig.add_subplot(spec[i, :]) for i in range(2)]

for idx, (model_name, data) in enumerate(correlation_per_model_wikitext.items()):
    y = [d[0] for d in data]
    x = [d[1] for d in data]
    ax[0].scatter(x, y, label=model_name, color=colors[idx % len(colors)])
    # display correlation line
    z = np.polyfit(x, y, 1)
    p = np.poly1d(z)
    ax[0].plot(
        x,
        p(x),
        "--",
        color=colors[idx % len(colors)],
        label=f"{model_name} (correlation: {pd.Series(x).corr(pd.Series(y)):.2f})",
    )
    ax[0].set_ylabel("Wikitext Accuracy")
    ax[0].set_xlabel("Parameter Count")
    ax[0].set_title("Wikitext Accuracy vs. Parameter Count", fontweight="bold")
    # set legend upper right
    ax[0].legend(loc="upper right")

for idx, (model_name, data) in enumerate(correlation_per_model_mmlu.items()):
    y = [d[0] for d in data]
    x = [d[1] for d in data]
    ax[1].scatter(x, y, label=model_name, color=colors[idx % len(colors)])
    # display correlation line
    z = np.polyfit(x, y, 1)
    p = np.poly1d(z)
    ax[1].plot(
        x,
        p(x),
        "--",
        color=colors[idx % len(colors)],
        label=f"{model_name} (correlation: {pd.Series(x).corr(pd.Series(y)):.2f})",
    )
    ax[1].set_ylabel("MMLU Accuracy")
    ax[1].set_xlabel("Parameter Count")
    ax[1].set_title("MMLU Accuracy vs. Parameter Count", fontweight="bold")
    # set legend upper right
    ax[1].legend(loc="upper right")

plt.tight_layout()
plt.savefig("visualizations/parameter_count_vs_accuracy.png")
plt.show()