In [None]:
import json
import matplotlib as mpl
import matplotlib.pyplot as plt
import os
from pprint import pprint

In [None]:
# retrieve original model benchmarks
folder_path_original_benchmarks = "logs/original_model_benchmarks"
original_benchmarks = {}
for filename in os.listdir(folder_path_original_benchmarks):
    with open(os.path.join(folder_path_original_benchmarks, filename), "r") as f:
        raw_data = json.load(f)
        model_name = raw_data["model_name"]
        original_benchmarks[model_name] = raw_data["original_model_benchmarks"]


In [None]:
data = {}
folder_path = "logs/second_experiment"
for filename in os.listdir(folder_path):
    # Check if the file is a JSON file
    if filename.endswith(".json"):
        # Construct the full path of the file
        file_path = os.path.join(folder_path, filename)

        # Open and parse the JSON file
        with open(file_path, "r") as file:
            model_data = json.load(file)

        # Store the parsed data in a dictionary using the filename as the key
        model_name = model_data["model_name"]
        original_model_benchmark = original_benchmarks[model_name]
        original_model_accuracy = original_model_benchmark["wikitext_accuracy"]
        quantization_data = {
            "error_threshold": model_data["error_threshold"],
            "quantized_model_benchmarks": model_data["quantized_model_benchmarks"],
            "average_bit_width": model_data["average_bit_width"],
            "min_quantile": model_data["min_quantile"],
            "max_quantile": model_data["max_quantile"],
            "layerwise_quantization_info": model_data["layerwise_quantization_info"],
        }

        # If the model name does not exist in the dictionary, add it
        if model_name not in data:
            data[model_name] = {
                "original_model_accuracy": original_model_accuracy,
                "quantization_data": [],
            }

        # Add the quantization data to the dictionary
        data[model_name]["quantization_data"].append(quantization_data)

In [None]:
# Create Subplots per model
# For each quantile range [(0.0, 1.0), (0.01, 0.99), (0.05, 0.95)]
# x-axis is the sqnr dB with labels and error_threshold as value
# y-axis is the model quantized model accuracy

fig, ax = plt.subplots(1, len(data), figsize=(20, 5))

models_in_correct_order = [
    "HuggingFaceTB/SmolLM-135M-Instruct",
    "meta-llama/Llama-3.2-3B-Instruct",
    "meta-llama/Llama-3.1-8B-Instruct",
]
# sort the models in the correct order
data = {k: data[k] for k in models_in_correct_order}
for i, (model_name, model_data) in enumerate(data.items()):
    original_model_accuracy = model_data["original_model_accuracy"]
    quantization_data = model_data["quantization_data"]
    lines = []

    for quantile_range in [(0.1, 0.9), (0.05, 0.95), (0.01, 0.99), (0.0, 1.0)]:
        x = []
        y = []

        for quantization_info in quantization_data:
            if (
                quantization_info["min_quantile"] != quantile_range[0]
                or quantization_info["max_quantile"] != quantile_range[1]
            ):
                continue
            sqnr = quantization_info["error_threshold"]
            quantized_model_benchmarks = quantization_info["quantized_model_benchmarks"]
            quantized_model_accuracy = quantized_model_benchmarks["wikitext_accuracy"]
            layerwise_quantization_info = quantization_info[
                "layerwise_quantization_info"
            ]

            x.append(sqnr)
            y.append(quantized_model_accuracy)

        # Sort the x and y values based on the x values
        x, y = zip(*sorted(zip(x, y)))

        # (line,) = ax[i].plot(x, y, label=f"{quantile_range} quantile range")
        # plot with markers
        (line,) = ax[i].plot(
            x,
            y,
            marker="o",
            label=f"{quantile_range} clipping range %",
        )
        lines.append(line)

    ax[i].axhline(y=original_model_accuracy, color="r", linestyle="--")
    # add horizontal line to legend
    lines.append(
        mpl.lines.Line2D(
            [0], [0], color="r", linestyle="--", label="Original Model Accuracy"
        )
    )

    # use 0 to .60 on y axis in steps of 0.05
    ax[i].yaxis.set_major_locator(mpl.ticker.MultipleLocator(0.05))
    # set axis limits
    ax[i].set_ylim(0, 0.6)

    ax[i].set_title(model_name)
    ax[i].set_xlabel("SQNR (dB)")
    ax[i].set_ylabel("Quantized Model Accuracy on WikiText")
    # set location of legend down right
    ax[i].legend(handles=lines, loc="lower right")
# sort plots by moving the last to first plot
plt.tight_layout()
plt.show()

In [None]:
# Create Subplots per model
# For each quantile range [(0.0, 1.0), (0.01, 0.99), (0.05, 0.95)]
# x-axis is the sqnr dB with labels and error_threshold as value
# y-axis is the model quantized model accuracy

fig, ax = plt.subplots(1, len(data), figsize=(20, 5))

models_in_correct_order = [
    "HuggingFaceTB/SmolLM-135M-Instruct",
    "meta-llama/Llama-3.2-3B-Instruct",
    "meta-llama/Llama-3.1-8B-Instruct",
]
# sort the models in the correct order
data = {k: data[k] for k in models_in_correct_order}
for i, (model_name, model_data) in enumerate(data.items()):
    original_model_accuracy = original_benchmarks[model_name]["mmlu_results"][
        "overall_score"
    ]
    quantization_data = model_data["quantization_data"]
    lines = []

    for quantile_range in [(0.1, 0.9), (0.05, 0.95), (0.01, 0.99), (0.0, 1.0)]:
        x = []
        y = []

        for quantization_info in quantization_data:
            if (
                quantization_info["min_quantile"] != quantile_range[0]
                or quantization_info["max_quantile"] != quantile_range[1]
            ):
                continue
            sqnr = quantization_info["error_threshold"]
            quantized_model_benchmarks = quantization_info["quantized_model_benchmarks"]
            quantized_model_accuracy = quantized_model_benchmarks["mmlu_results"][
                "overall_score"
            ]
            layerwise_quantization_info = quantization_info[
                "layerwise_quantization_info"
            ]

            x.append(sqnr)
            y.append(quantized_model_accuracy)

        # Sort the x and y values based on the x values
        x, y = zip(*sorted(zip(x, y)))

        # (line,) = ax[i].plot(x, y, label=f"{quantile_range} quantile range")
        # plot with markers
        (line,) = ax[i].plot(
            x,
            y,
            marker="o",
            label=f"{quantile_range} clipping range %",
        )
        lines.append(line)

    ax[i].axhline(y=original_model_accuracy, color="r", linestyle="--")
    ax[i].axhline(y=0.25, color="violet", linestyle="--")
    # add horizontal line to legend
    lines.append(
        mpl.lines.Line2D(
            [0], [0], color="r", linestyle="--", label="Original Model Accuracy"
        )
    )
    lines.append(
        mpl.lines.Line2D([0], [1], color="violet", linestyle="--", label="Baseline")
    )
    # use 0 to .60 on y axis in steps of 0.05
    ax[i].yaxis.set_major_locator(mpl.ticker.MultipleLocator(0.05))
    # set axis limits
    ax[i].set_ylim(0, 0.7)

    ax[i].set_title(model_name)
    ax[i].set_xlabel("SQNR (dB)")
    ax[i].set_ylabel("Quantized Model Accuracy on MMLU tasks")
    # set location of legend down right
    ax[i].legend(handles=lines, loc="lower right")
# sort plots by moving the last to first plot
plt.tight_layout()
plt.show()

In [None]:
# Create Subplots per model
# For each quantile range [(0.0, 1.0), (0.01, 0.99), (0.05, 0.95)]
# x-axis is the sqnr dB with labels and error_threshold as value
# y-axis is the model quantized model accuracy

fig, ax = plt.subplots(1, len(data), figsize=(20, 5))

models_in_correct_order = [
    "HuggingFaceTB/SmolLM-135M-Instruct",
    "meta-llama/Llama-3.2-3B-Instruct",
    "meta-llama/Llama-3.1-8B-Instruct",
]
# sort the models in the correct order
data = {k: data[k] for k in models_in_correct_order}
for i, (model_name, model_data) in enumerate(data.items()):
    original_model_accuracy = model_data["original_model_accuracy"]
    quantization_data = model_data["quantization_data"]
    lines = []

    for quantile_range in [(0.1, 0.9), (0.05, 0.95), (0.01, 0.99), (0.0, 1.0)]:
        x = []
        y = []

        for quantization_info in quantization_data:
            if (
                quantization_info["min_quantile"] != quantile_range[0]
                or quantization_info["max_quantile"] != quantile_range[1]
            ):
                continue
            average_bit_width = quantization_info["average_bit_width"]
            quantized_model_benchmarks = quantization_info["quantized_model_benchmarks"]
            quantized_model_accuracy = quantized_model_benchmarks["wikitext_accuracy"]
            layerwise_quantization_info = quantization_info[
                "layerwise_quantization_info"
            ]

            x.append(average_bit_width)
            y.append(quantized_model_accuracy)

        # Sort the x and y values based on the x values
        x, y = zip(*sorted(zip(x, y)))

        # (line,) = ax[i].plot(x, y, label=f"{quantile_range} quantile range")
        # plot with markers
        (line,) = ax[i].plot(
            x,
            y,
            marker="o",
            label=f"{quantile_range} clipping range %",
        )
        lines.append(line)

    ax[i].axhline(y=original_model_accuracy, color="r", linestyle="--")
    # add horizontal line to legend
    lines.append(
        mpl.lines.Line2D(
            [0],
            [0],
            color="r",
            linestyle="--",
            label="Original Model Accuracy",
        )
    )

    # use 0 to .60 on y axis in steps of 0.05
    ax[i].yaxis.set_major_locator(mpl.ticker.MultipleLocator(0.05))
    # set axis limits
    ax[i].set_ylim(0, 0.6)

    ax[i].set_title(model_name)
    ax[i].set_xlabel("Average Bit Width")
    ax[i].set_ylabel("Quantized Model Accuracy  on WikiText")
    ax[i].legend(handles=lines, loc="lower right")
# sort plots by moving the last to first plot
plt.tight_layout()
plt.show()

In [None]:
# Create Subplots per model
# For each quantile range [(0.0, 1.0), (0.01, 0.99), (0.05, 0.95)]
# x-axis is the sqnr dB with labels and error_threshold as value
# y-axis is the model quantized model accuracy

fig, ax = plt.subplots(1, len(data), figsize=(20, 5))

models_in_correct_order = [
    "HuggingFaceTB/SmolLM-135M-Instruct",
    "meta-llama/Llama-3.2-3B-Instruct",
    "meta-llama/Llama-3.1-8B-Instruct",
]
# sort the models in the correct order
data = {k: data[k] for k in models_in_correct_order}
for i, (model_name, model_data) in enumerate(data.items()):
    original_model_accuracy = original_benchmarks[model_name]["mmlu_results"][
        "overall_score"
    ]
    quantization_data = model_data["quantization_data"]
    lines = []

    for quantile_range in [(0.1, 0.9), (0.05, 0.95), (0.01, 0.99), (0.0, 1.0)]:
        x = []
        y = []

        for quantization_info in quantization_data:
            if (
                quantization_info["min_quantile"] != quantile_range[0]
                or quantization_info["max_quantile"] != quantile_range[1]
            ):
                continue
            average_bit_width = quantization_info["average_bit_width"]
            quantized_model_benchmarks = quantization_info["quantized_model_benchmarks"]
            quantized_model_accuracy = quantized_model_benchmarks["mmlu_results"][
                "overall_score"
            ]
            layerwise_quantization_info = quantization_info[
                "layerwise_quantization_info"
            ]

            x.append(average_bit_width)
            y.append(quantized_model_accuracy)

        # Sort the x and y values based on the x values
        x, y = zip(*sorted(zip(x, y)))

        # (line,) = ax[i].plot(x, y, label=f"{quantile_range} quantile range")
        # plot with markers
        (line,) = ax[i].plot(
            x,
            y,
            marker="o",
            label=f"{quantile_range} clipping range %",
        )
        lines.append(line)

    ax[i].axhline(y=original_model_accuracy, color="r", linestyle="--")
    ax[i].axhline(y=0.25, color="violet", linestyle="--")
    # add horizontal line to legend
    lines.append(
        mpl.lines.Line2D(
            [0],
            [0],
            color="r",
            linestyle="--",
            label="Original Model Accuracy on WikiText",
        )
    )
    lines.append(
        mpl.lines.Line2D(
            [0], [1], color="violet", linestyle="--", label="MMLU Baseline"
        )
    )
    # use 0 to .60 on y axis in steps of 0.05
    ax[i].yaxis.set_major_locator(mpl.ticker.MultipleLocator(0.05))
    # set axis limits
    ax[i].set_ylim(0, 0.7)

    ax[i].set_title(model_name)
    ax[i].set_xlabel("Average Bit Width")
    ax[i].set_ylabel("Quantized Model Accuracy on MMLU tasks")
    ax[i].legend(handles=lines, loc="lower right")
# sort plots by moving the last to first plot
plt.tight_layout()
plt.show()

# Per Layer Analysis

## Layer Distribution over all layers per model

In [None]:
# Create Subplots per model
# For each quantile range [(0.0, 1.0), (0.01, 0.99), (0.05, 0.95)]
# x-axis is the sqnr dB with labels and error_threshold as value
# y-axis is the model quantized model accuracy

# fig, ax = plt.subplots(1, len(data), figsize=(20, 5))
import seaborn as sns
fig, ax = plt.subplots(1, len(data), figsize=(20, 5))

models_in_correct_order = [
    "HuggingFaceTB/SmolLM-135M-Instruct",
    "meta-llama/Llama-3.2-3B-Instruct",
    "meta-llama/Llama-3.1-8B-Instruct",
]
# sort the models in the correct order
data = {k: data[k] for k in models_in_correct_order}
for i, (model_name, model_data) in enumerate(data.items()):

    quantization_data = model_data["quantization_data"]

    for quantization_info in quantization_data:
    
        # if quantization_info["min_quantile"] != 0 or quantization_info["error_threshold"] != 10:
        if quantization_info["min_quantile"] != 0:
            continue
            
        distributions = []

        layerwise_quantization_info = quantization_info["layerwise_quantization_info"]

        for layer in layerwise_quantization_info.values():
            distributions.append(layer["bit_width"])

        possible_x_values = [2, 3, 4, 5, 6, 8, 10, 12, 16]

        # show numbers on top of bars
        ax[i].hist(distributions, bins=possible_x_values, histtype="step")
        # ax[i].plot(possible_x_values, sns.kdeplot(distributions))
        # for rect in ax[i].patches:
        #     height = rect.get_height()
        #     ax[i].annotate(
        #         f"{height}",
        #         xy=(rect.get_x() + rect.get_width() / 2, height),
        #         xytext=(0, 3),  # 3 points vertical offset
        #         textcoords="offset points",
        #         ha="center",
        #         va="bottom",
        #     )
        
        ax[i].set_title(model_name)
        # ax[i].set_xlabel("Average Bit Width")
        # ax[i].set_ylabel("Quantized Model Accuracy on MMLU tasks")


# sort plots by moving the last to first plot
plt.tight_layout()
plt.show()

## Average Bitwidth per Component

In [None]:
components = [
    "self_attn",
    "mlp",
    "lm_head",
]

models_in_correct_order = [
    "HuggingFaceTB/SmolLM-135M-Instruct",
    "meta-llama/Llama-3.2-3B-Instruct",
    "meta-llama/Llama-3.1-8B-Instruct",
]

fig, ax = plt.subplots(len(components), len(models_in_correct_order), figsize=(20, 5))
# sort the models in the correct order
data = {k: data[k] for k in models_in_correct_order}
for i, (model_name, model_data) in enumerate(data.items()):
    for j, component in enumerate(components):

        quantization_data = model_data["quantization_data"]

        for quantization_info in quantization_data:
        
            if quantization_info["min_quantile"] != 0 or quantization_info["error_threshold"] != 10:
            # if quantization_info["min_quantile"] != 0:
                continue
                
            plot_data = {k: {"sum": 0, "count": 0, "average": 0} for k in components}

            layerwise_quantization_info = quantization_info["layerwise_quantization_info"]

            for layer_name, layer in layerwise_quantization_info.items():
                for component in components:
                    if component in layer_name:
                        plot_data[component]["sum"] += layer["bit_width"]
                        plot_data[component]["count"] += 1
            
            for component in components:
                plot_data[component]["average"] = plot_data[component]["sum"] / plot_data[component]["count"]
                print(plot_data[component]["average"])
                distributions = [plot_data[component]["average"]]
            
            possible_x_values = [2, 3, 4, 5, 6, 8, 10, 12, 16]

            # show numbers on top of bars
            ax[j, i].hist(distributions, bins=possible_x_values)
            # ax[i].plot(possible_x_values, sns.kdeplot(distributions))
            for rect in ax[j, i].patches:
                height = rect.get_height()
                ax[j, i].annotate(
                    f"{height}",
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha="center",
                    va="bottom",
                )
            
            ax[j, i].set_title(model_name)
            # ax[i].set_xlabel("Average Bit Width")
            # ax[i].set_ylabel("Quantized Model Accuracy on MMLU tasks")


# sort plots by moving the last to first plot
plt.tight_layout()
plt.show()