In [2]:
from transformers import AutoConfig
from transformers.models.gpt_neox.modeling_gpt_neox import GPTNeoXLayer
import numpy as np
import torch as th


MODELS = [
    "19m-deduped", "125m-deduped", "350m-deduped", "1.3b-deduped",
    "2.7b-deduped", "6.7b-deduped", "13b-deduped",
]
NAMES = [
    "70M", "160M", "410M", "1.4B", "2.8B", "6.9B", "12B"
]

DEPTHS = []
WIDTHS = []
baselines = {}
metrics = {}
slopes = {}
root = "/mnt/ssd-1/nora/real-lenses/pythia/"

for model in MODELS:
    config = AutoConfig.from_pretrained(f"EleutherAI/pythia-{model}")
    DEPTHS.append(config.num_hidden_layers)
    WIDTHS.append(config.hidden_size)

    results = th.load(
        root + f"{model}/affine/eval/aggregate_metrics.pt",
        map_location="cpu"
    )
    baselines[model] = list(results['baseline_ce'].values())
    metrics[model] = list(results['lens_ce'].values())

neox_results = th.load(
    "/mnt/ssd-1/nora/real-lenses/gpt-neox-20b/classic/eval/aggregate_metrics.pt",
    map_location="cpu"
)
config = AutoConfig.from_pretrained("EleutherAI/gpt-neox-20b")
layer = GPTNeoXLayer(config)
DEPTHS.append(config.num_hidden_layers)
MODELS.append("20b")
NAMES.append("20B<br>(NeoX)")
WIDTHS.append(config.hidden_size)
baselines["20b"] = list(neox_results['baseline_ce'].values())
metrics["20b"] = list(neox_results['lens_ce'].values())

In [48]:
from transformers import AutoConfig, AutoModel

tiny = AutoModel.from_pretrained("EleutherAI/pythia-19m-deduped")
embed_params = sum(p.numel() for p in tiny.get_input_embeddings().parameters() if p.requires_grad)
sum(p.numel() for p in tiny.parameters() if p.requires_grad) - embed_params

Some weights of the model checkpoint at EleutherAI/pythia-19m-deduped were not used when initializing GPTNeoXModel: ['embed_out.weight']
- This IS expected if you are initializing GPTNeoXModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing GPTNeoXModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


18915328

# GPT-2

In [56]:
from plotly.subplots import make_subplots
import plotly.express as px
import plotly.graph_objects as go

PAGE_WIDTH = 6.75 * 96 * 2

from transformers import AutoConfig, AutoModel
import numpy as np
import torch as th


MODELS = [
    "gpt2", "gpt2-medium", "gpt2-large", "gpt2-xl",
]
NAMES = [
    "small", "medium", "large", "xl"
]

DEPTHS = []
PARAMS = []
WIDTHS = []
baselines = {}
metrics = {}
root = "/mnt/ssd-1/nora/real-lenses/gpt2/"

for name, model in zip(NAMES, MODELS):
    config = AutoConfig.from_pretrained(model)
    m = AutoModel.from_config(config)
    DEPTHS.append(config.num_hidden_layers)
    PARAMS.append(
        sum(p.numel() for p in m.parameters() if p.requires_grad)
    )
    WIDTHS.append(config.hidden_size)

    results = th.load(
        root + f"{name}/affine/eval/aggregate_metrics.pt",
        map_location="cpu"
    )
    baselines[name] = list(results['baseline_ce'].values())
    metrics[name] = list(results['lens_ce'].values())


master = make_subplots(
    rows=1,
    cols=2,
    subplot_titles=("Logit lens (baseline)", "Tuned lens (ours)"),
    horizontal_spacing=0.05,
    vertical_spacing=0.1,
    x_title="Layer",
    y_title="bits per byte",
).add_traces([
    go.Scatter(
        x=np.arange(depth),
        y=baselines[name][:-1],
        marker_color=color,
        mode="lines+markers",
        name=name,
        showlegend=False,
    ) for color, model, name, depth in zip(
        px.colors.sequential.Plasma[::2],
        MODELS,
        NAMES,
        DEPTHS
    )
], rows=1, cols=1
).add_traces([
    go.Scatter(
        x=np.arange(depth),
        y=metrics[name],
        marker_color=color,
        mode="lines+markers",
        name=name,
       # opacity=0.8,
    ) for color, model, name, depth in zip(
        px.colors.sequential.Plasma[::2],
        MODELS,
        NAMES,
        DEPTHS
    )
], rows=1, cols=2
).update_xaxes(
    dtick=5,
).update_yaxes(
    range=[0, 5],
).update_annotations(
    font=dict(size=20, color="black"),
).update_layout(
    font=dict(size=16, color="black"),
    height=PAGE_WIDTH / 2.5,
    hovermode="x unified",
    legend=dict(
        title_text="Model size",
        x=0.9,
        y=0.98,
    ),
    xaxis1=dict(
        tickangle=-20,
        tickvals=list(range(0, 50, 5)),
        ticktext=["input"] + [str(i * 5) for i in range(1, 45 - 1)],
    ),
    xaxis2=dict(
        tickangle=-20,
        tickvals=list(range(0, 50 - 1, 5)),
        ticktext=["input"] + [str(i * 5) for i in range(1, 45 - 1)],
    ),
    margin_l=70,
    margin_r=30,
    margin_t=50,
    margin_b=70,
    width=PAGE_WIDTH,
)
master

In [58]:
master.write_image("/mnt/ssd-1/nora/gpt2-perplexity.pdf")

In [60]:
from plotly.subplots import make_subplots
import plotly.express as px
import plotly.graph_objects as go

PAGE_WIDTH = 6.75 * 96 * 2

from transformers import AutoConfig, AutoModel
import numpy as np
import torch as th


MODELS = [
    "EleutherAI/gpt-neo-125M", "EleutherAI/gpt-neo-1.3B", "EleutherAI/gpt-neo-2.7B",
]
NAMES = [
    "125M", "1.3B", "2.7B"
]

DEPTHS = []
PARAMS = []
WIDTHS = []
baselines = {}
metrics = {}
root = "/mnt/ssd-1/nora/real-lenses/gpt-neo/"

for name, model in zip(NAMES, MODELS):
    config = AutoConfig.from_pretrained(model)
    m = AutoModel.from_config(config)
    DEPTHS.append(config.num_hidden_layers)
    PARAMS.append(
        sum(p.numel() for p in m.parameters() if p.requires_grad)
    )
    WIDTHS.append(config.hidden_size)

    results = th.load(
        root + f"{name}/extra-layer/eval/aggregate_metrics.pt",
        map_location="cpu"
    )
    baselines[name] = list(results['baseline_ce'].values())
    metrics[name] = list(results['lens_ce'].values())

In [64]:
master = make_subplots(
    rows=1,
    cols=2,
    subplot_titles=("Logit lens (baseline)", "Tuned lens (ours)"),
    horizontal_spacing=0.05,
    vertical_spacing=0.1,
    x_title="Layer",
    y_title="bits per byte",
).add_traces([
    go.Scatter(
        x=np.arange(depth),
        y=baselines[name][:-1],
        marker_color=color,
        mode="lines+markers",
        name=name,
        showlegend=False,
    ) for color, model, name, depth in zip(
        px.colors.sequential.Plasma[::3],
        MODELS,
        NAMES,
        DEPTHS
    )
], rows=1, cols=1
).add_traces([
    go.Scatter(
        x=np.arange(depth),
        y=metrics[name],
        marker_color=color,
        mode="lines+markers",
        name=name,
       # opacity=0.8,
    ) for color, model, name, depth in zip(
        px.colors.sequential.Plasma[::3],
        MODELS,
        NAMES,
        DEPTHS
    )
], rows=1, cols=2
).update_xaxes(
    dtick=5,
).update_yaxes(
    range=[0, 6],
).update_annotations(
    font=dict(size=20, color="black"),
).update_layout(
    font=dict(size=16, color="black"),
    height=PAGE_WIDTH / 2.5,
    hovermode="x unified",
    legend=dict(
        title_text="Model size",
        x=0.9,
        y=0.98,
    ),
    xaxis1=dict(
        tickangle=-20,
        tickvals=list(range(0, 50, 5)),
        ticktext=["input"] + [str(i * 5) for i in range(1, 45 - 1)],
    ),
    xaxis2=dict(
        tickangle=-20,
        tickvals=list(range(0, 50 - 1, 5)),
        ticktext=["input"] + [str(i * 5) for i in range(1, 45 - 1)],
    ),
    margin_l=70,
    margin_r=30,
    margin_t=50,
    margin_b=70,
    width=PAGE_WIDTH,
)
master

In [66]:
master.write_image("/mnt/ssd-1/nora/neo-perplexity.pdf")

# OPT

In [70]:
from transformers import AutoConfig, AutoModel
import numpy as np
import torch as th


MODELS = [
    "facebook/opt-125m", "facebook/opt-1.3b", "facebook/opt-6.7b",
]
NAMES = [
    "125m", "1.3b", "6.7b"
]

DEPTHS = []
# PARAMS = []
WIDTHS = []
baselines = {}
metrics = {}
root = "/mnt/ssd-1/nora/real-lenses/opt/"

for name, model in zip(NAMES, MODELS):
    config = AutoConfig.from_pretrained(model)
    # m = AutoModel.from_config(config)
    DEPTHS.append(config.num_hidden_layers)
    # PARAMS.append(
    #     sum(p.numel() for p in m.parameters() if p.requires_grad)
    # )
    WIDTHS.append(config.hidden_size)

    results = th.load(
        root + f"{name}/affine-clean/eval/aggregate_metrics.pt",
        map_location="cpu"
    )
    baselines[name] = list(results['baseline_ce'].values())
    metrics[name] = list(results['lens_ce'].values())

In [79]:
from plotly.subplots import make_subplots
import plotly.express as px
import plotly.graph_objects as go

PAGE_WIDTH = 6.75 * 96 * 2

master = make_subplots(
    rows=1,
    cols=2,
    subplot_titles=("Logit lens (baseline)", "Tuned lens (ours)"),
    horizontal_spacing=0.05,
    vertical_spacing=0.1,
    shared_yaxes=True,
    x_title="Layer",
    y_title="bits per byte",
).add_traces([
    go.Scatter(
        x=np.arange(depth),
        y=baselines[name][:-1],
        marker_color=color,
        mode="lines+markers",
        name=name,
        showlegend=False,
    ) for color, model, name, depth in zip(
        px.colors.sequential.Plasma[::3],
        MODELS,
        NAMES,
        DEPTHS
    )
], rows=1, cols=1
).add_traces([
    go.Scatter(
        x=np.arange(depth),
        y=metrics[name],
        marker_color=color,
        mode="lines+markers",
        name=name,
       # opacity=0.8,
    ) for color, model, name, depth in zip(
        px.colors.sequential.Plasma[::3],
        MODELS,
        NAMES,
        DEPTHS
    )
], rows=1, cols=2
).update_xaxes(
    dtick=5,
).update_yaxes(
    showticklabels=True,
    type="log",
    # range=[0, 14],
).update_annotations(
    font=dict(size=20, color="black"),
).update_layout(
    font=dict(size=16, color="black"),
    height=PAGE_WIDTH / 2.5,
    hovermode="x unified",
    legend=dict(
        title_text="Model size",
        x=0.9,
        y=0.98,
    ),
    xaxis1=dict(
        tickangle=-20,
        tickvals=list(range(0, 50, 5)),
        ticktext=["input"] + [str(i * 5) for i in range(1, 45 - 1)],
    ),
    xaxis2=dict(
        tickangle=-20,
        tickvals=list(range(0, 50 - 1, 5)),
        ticktext=["input"] + [str(i * 5) for i in range(1, 45 - 1)],
    ),
    margin_l=70,
    margin_r=30,
    margin_t=50,
    margin_b=70,
    width=PAGE_WIDTH,
)
master

In [81]:
master.write_image("/mnt/ssd-1/nora/opt-perplexity.pdf")

In [392]:
from itertools import product
from scipy.optimize import curve_fit
from typing import NamedTuple, Sequence


class Break(NamedTuple):
    c: float
    d: float
    f: float


class BNSL(NamedTuple):
    a: float
    b: float
    c0: float

    breaks: Sequence[Break]

    @classmethod
    def fit(cls, x, y, num_breaks: int = 1):
        assert np.all(x > 0) and np.all(y > 0)

        q = np.linspace(0, 1, 5)[1:-1]
        x_quantiles = np.log(np.quantile(x, q))
        y_quantiles = np.quantile(y, (0.0, 0.25, 0.5, 0.75, 1.0))

        # Test grid of initializations
        best_loss = np.inf
        best_p = None
        exp_grid = np.linspace(0.1, 0.99, 5)
        log_grid = np.linspace(1, 10, 10)
        break_grid = (exp_grid, x_quantiles, exp_grid) * num_breaks

        for params in product(y_quantiles, log_grid, exp_grid, *break_grid):
            loss = cls.from_params(params).loss(x, y)
            if best_p is None or loss < best_loss:
                best_loss = loss
                best_p = params

        def fn(x, *p):
            y_pred = cls.from_params(p)(x)
            return np.log(y_pred)
    
        break_lb = [0, np.log(x.min()), 0] * num_breaks
        break_ub = [1, np.log(x.max()), np.inf] * num_breaks

        p_star, *_ = curve_fit(
            fn, x, np.log(y), best_p,
            bounds=(
                np.array([-np.inf, -np.inf, 0] + break_lb),
                np.array([np.inf, np.inf, 1] + break_ub)
            ),
            maxfev=None,
        )
        return cls.from_params(p_star)

    @classmethod
    def from_params(cls, params):
        a, log_b, c, *break_params = params

        breaks = []
        for i in range(0, len(break_params), 3):
            c_i, log_d_i, f_i = break_params[i:i+3]
            breaks.append(
                Break(c_i, np.exp(log_d_i), f_i)
            )

        return cls(a, np.exp(log_b), c, breaks)

    def to_params(self):
        break_params = []
        for break_ in self.breaks:
            break_params.extend([
                break_.c,
                np.log(break_.d),
                break_.f,
            ])

        return (
            self.a,
            np.log(self.b),
            self.c0,
            *break_params
        )

    def __call__(self, x):
        y = self.b * x ** -self.c0
        for c_i, d_i, f_i in self.breaks:
            y *= (1.0 + (x / d_i) ** (1.0 / f_i)) ** (-c_i * f_i)

        return self.a + y

    def loss(self, x, y):
        """Mean squared log error"""
        log_diff = np.log(self(x)) - np.log(y)
        return np.mean(log_diff ** 2)



In [None]:
import numpy as np

def find_break(y, log_x = False, log_y = False):
    N = len(y)
    assert N > 4

    x = np.arange(1, N + 1)
    if log_x:
        x = np.log(x)
    if log_y:
        y = np.log(y)

    best_loss = np.inf
    best_i = None

    for i in range(2, N - 2):
        m1, b1 = np.polyfit(x[:i], y[:i], 1)
        m2, b2 = np.polyfit(x[i:], y[i:], 1)
        
        y_hat1 = m1 * x[:i] + b1
        y_hat2 = m2 * x[i:] + b2
        
        mse1 = np.sum((y[:i] - y_hat1) ** 2)
        mse2 = np.sum((y[i:] - y_hat2) ** 2)

        loss = mse1 + mse2
        if loss < best_loss:
            best_loss = loss
            best_i = i

    return y[best_i]