# Loading the 3B Model

In [None]:
from transformers import AutoModelForCausalLM
import matplotlib as mpl
import numpy as np
from scipy.stats import norm
from matplotlib.gridspec import GridSpec
import matplotlib.pyplot as plt

In [None]:
# apply arial font, great font sizes, set dpi to 300
mpl.rc("font", size=16)
mpl.rc("axes", titlesize=16)
mpl.rc("axes", labelsize=16)
mpl.rc("xtick", labelsize=16)
mpl.rc("ytick", labelsize=16)
mpl.rc("legend", fontsize=14)
mpl.rc("figure", dpi=300)

In [None]:
model_name = "HuggingFaceTB/SmolLM-135M-Instruct"
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")

In [None]:
head_weights = model.lm_head
np_head_weights = head_weights.weight.detach().cpu().numpy()

In [None]:
# Show distribution of weights in the head
fig = plt.figure(figsize=(10, 5))
ax = fig.add_subplot()

ax.hist(np_head_weights.flatten(), bins=100, color="tab:blue")
ax.hist(
    np_head_weights.flatten(), bins=100, color="tab:red", histtype="step", linewidth=5
)
values_at_99 = np.percentile(np_head_weights, 99)
values_at_1 = np.percentile(np_head_weights, 1)
ax.axvline(
    values_at_99,
    color="tab:green",
    linestyle="--",
    label="99th percentile",
    linewidth=2,
)
ax.axvline(
    values_at_1, color="tab:orange", linestyle="--", label="1st percentile", linewidth=2
)
# add text to min and max
ax.text(np_head_weights.min(), 0, "min", ha="center", va="bottom")
ax.text(np_head_weights.max(), 0, "max", ha="center", va="bottom")
ax.set_xlabel("Weight value")
ax.set_ylabel("Frequency")
ax.legend()
ax.set_title("Distribution of weights in lm_head for SmolLM-135M-Instruct")
plt.tight_layout()
plt.savefig("visualizations/weight_distribution.png")
plt.show()

In [None]:
# create box plot of the same weights
fig = plt.figure(figsize=(10, 5))
ax = fig.add_subplot()
ax.boxplot(np_head_weights.flatten(), vert=False)
# # again display the 1st and 99th percentile
# ax.axvline(
#     values_at_99,
#     color="tab:green",
#     linestyle="--",
#     label="99th percentile",
#     linewidth=2,
# )
# ax.axvline(
#     values_at_1, color="tab:orange", linestyle="--", label="1st percentile", linewidth=2
# )
# add text to median, 25th and 75th percentile, -1.5 and 1.5 IQR
ax.text(
    np.median(np_head_weights),
    0.85,
    "median",
    ha="center",
    va="bottom",
    fontsize=10,
)
ax.text(
    np.percentile(np_head_weights, 25)
    - 1.5 * (np.percentile(np_head_weights, 75) - np.percentile(np_head_weights, 25)),
    1.05,
    "-1.5 IQR",
    ha="center",
    va="bottom",
    fontsize=10,
)
ax.text(
    np.percentile(np_head_weights, 75)
    + 1.5 * (np.percentile(np_head_weights, 75) - np.percentile(np_head_weights, 25)),
    1.05,
    "+1.5 IQR",
    ha="center",
    va="bottom",
    fontsize=10,
)
ax.set_xlabel("Weight value")
ax.set_title("Box plot of weights in lm_head for SmolLM-135M-Instruct")
ax.legend(loc="upper right")
plt.tight_layout()
plt.savefig("visualizations/weight_boxplot.png")
plt.show()