In [1]:
import os
import torch
import numpy as np
import matplotlib.pyplot as plt

In [2]:
act_folder = '/home/chengzhang/Multimodal-Quantization/evaluation/activations/llava-7b'
img_folder = '/home/chengzhang/Multimodal-Quantization/LLaVA/snapshot/img/activations'

In [3]:
sample_tokens = 4096

def load_channel(
    path: str,
    comp: str,
    channel: int,
) -> np.ndarray:
    return torch.load(f'{path}/{comp}.pt')[:, channel].cpu().numpy()


bits = 4
bins = 2 ** (bits - 1)
ticks = np.square(np.arange(-1, 1, 1 / bins))
ticks[:bins] *= -1
ticks = ticks[1:]


def plot_channel(ax: plt.Axes, x: np.ndarray, plot_ticks: bool = False, bins: int = 200):
    scale = np.abs(x).max()
    thres = scale * 0.99
    quant_x = np.where(np.abs(x) < thres, x, 0.0)
    quant_scale = np.abs(quant_x).max()
    freqs, _, _ = ax.hist(x, bins=bins)
    if plot_ticks:
        for pos in ticks:
            ax.plot([pos * quant_scale, pos * quant_scale], [0, max(freqs)], color='C1', linewidth=1)

In [4]:
num_cols = 2

def plot_all_comp(layer, channel):
    suptitle = f'Activation Distribution: Layer #{layer}, Channel #{channel}'
    print(suptitle)
    plt.figure(figsize=(15, 16))

    for i, comp in enumerate(['q-proj', 'o-proj', 'up-proj', 'down-proj']):
        title = f'[L{layer}.{comp}] TextVQA Text Tokens'
        ax = plt.subplot(4, num_cols, i * num_cols + 1)
        x = load_channel(f'{act_folder}/textqa', f'{layer:0>2}-{comp}', channel)
        plot_channel(ax, x)
        ax.set_title(title)
        print(title)
        title = f'[L{layer}.{comp}] TextVQA Vision Tokens'
        ax = plt.subplot(4, num_cols, i * num_cols + 2)
        x = load_channel(f'{act_folder}/textv', f'{layer:0>2}-{comp}', channel)
        plot_channel(ax, x)
        ax.set_title(title)
        print(title)

    plt.suptitle(suptitle)
    save_folder = os.path.join(img_folder, 'textvqa-text-vs-vision')
    os.makedirs(save_folder, exist_ok=True)
    plt.savefig(os.path.join(save_folder, f'{layer}.{channel}.png'))

In [5]:
for layer in range(32):
    plot_all_comp(layer, channel=31)
    plt.clf()

Activation Distribution: Layer #0, Channel #31
[L0.q-proj] TextVQA Text Tokens
[L0.q-proj] TextVQA Vision Tokens
[L0.o-proj] TextVQA Text Tokens
[L0.o-proj] TextVQA Vision Tokens
[L0.up-proj] TextVQA Text Tokens
[L0.up-proj] TextVQA Vision Tokens
[L0.down-proj] TextVQA Text Tokens
[L0.down-proj] TextVQA Vision Tokens
Activation Distribution: Layer #1, Channel #31
[L1.q-proj] TextVQA Text Tokens
[L1.q-proj] TextVQA Vision Tokens
[L1.o-proj] TextVQA Text Tokens
[L1.o-proj] TextVQA Vision Tokens
[L1.up-proj] TextVQA Text Tokens
[L1.up-proj] TextVQA Vision Tokens
[L1.down-proj] TextVQA Text Tokens
[L1.down-proj] TextVQA Vision Tokens
Activation Distribution: Layer #2, Channel #31
[L2.q-proj] TextVQA Text Tokens
[L2.q-proj] TextVQA Vision Tokens
[L2.o-proj] TextVQA Text Tokens
[L2.o-proj] TextVQA Vision Tokens
[L2.up-proj] TextVQA Text Tokens
[L2.up-proj] TextVQA Vision Tokens
[L2.down-proj] TextVQA Text Tokens
[L2.down-proj] TextVQA Vision Tokens
Activation Distribution: Layer #3, Channel 

  plt.figure(figsize=(15, 16))


[L20.q-proj] TextVQA Text Tokens
[L20.q-proj] TextVQA Vision Tokens
[L20.o-proj] TextVQA Text Tokens
[L20.o-proj] TextVQA Vision Tokens
[L20.up-proj] TextVQA Text Tokens
[L20.up-proj] TextVQA Vision Tokens
[L20.down-proj] TextVQA Text Tokens
[L20.down-proj] TextVQA Vision Tokens
Activation Distribution: Layer #21, Channel #31
[L21.q-proj] TextVQA Text Tokens
[L21.q-proj] TextVQA Vision Tokens
[L21.o-proj] TextVQA Text Tokens
[L21.o-proj] TextVQA Vision Tokens
[L21.up-proj] TextVQA Text Tokens
[L21.up-proj] TextVQA Vision Tokens
[L21.down-proj] TextVQA Text Tokens
[L21.down-proj] TextVQA Vision Tokens
Activation Distribution: Layer #22, Channel #31
[L22.q-proj] TextVQA Text Tokens
[L22.q-proj] TextVQA Vision Tokens
[L22.o-proj] TextVQA Text Tokens
[L22.o-proj] TextVQA Vision Tokens
[L22.up-proj] TextVQA Text Tokens
[L22.up-proj] TextVQA Vision Tokens
[L22.down-proj] TextVQA Text Tokens
[L22.down-proj] TextVQA Vision Tokens
Activation Distribution: Layer #23, Channel #31
[L23.q-proj] Tex

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

In [6]:
num_cols = 3

def plot_all_comp(layer, channel):
    suptitle = f'Activation Distribution: Layer #{layer}, Channel #{channel}'
    print(suptitle)
    plt.figure(figsize=(15, 16))

    for i, comp in enumerate(['q-proj', 'o-proj', 'up-proj', 'down-proj']):
        title = f'[L{layer}.{comp}] Pure Text Inputs'
        ax = plt.subplot(4, num_cols, i * num_cols + 1)
        x = load_channel(f'{act_folder}/c4-sample', f'{layer:0>2}-{comp}', channel)
        plot_channel(ax, x)
        ax.set_title(title)
        print(title)
        title = f'[L{layer}.{comp}] Pure Vision Inputs'
        ax = plt.subplot(4, num_cols, i * num_cols + 2)
        x = load_channel(f'{act_folder}/coco-sample', f'{layer:0>2}-{comp}', channel)
        plot_channel(ax, x)
        ax.set_title(title)
        print(title)
        title = f'[L{layer}.{comp}] Text + Vision Inputs'
        ax = plt.subplot(4, num_cols, i * num_cols + 3)
        x = np.concatenate([
            load_channel(f'{act_folder}/textqa', f'{layer:0>2}-{comp}', channel),
            load_channel(f'{act_folder}/textv', f'{layer:0>2}-{comp}', channel),
        ], axis=-1)
        plot_channel(ax, x)
        ax.set_title(title)
        print(title)

    plt.suptitle(suptitle)
    save_folder = os.path.join(img_folder, 'text-vs-vision-vs-hybrid')
    os.makedirs(save_folder, exist_ok=True)
    plt.savefig(os.path.join(save_folder, f'{layer}.{channel}.png'))

In [7]:
for layer in range(32):
    plot_all_comp(layer, channel=31)
    plt.clf()

Activation Distribution: Layer #0, Channel #31
[L0.q-proj] Pure Text Inputs
[L0.q-proj] Pure Vision Inputs


[L0.q-proj] Text + Vision Inputs
[L0.o-proj] Pure Text Inputs
[L0.o-proj] Pure Vision Inputs
[L0.o-proj] Text + Vision Inputs
[L0.up-proj] Pure Text Inputs
[L0.up-proj] Pure Vision Inputs
[L0.up-proj] Text + Vision Inputs
[L0.down-proj] Pure Text Inputs
[L0.down-proj] Pure Vision Inputs
[L0.down-proj] Text + Vision Inputs
Activation Distribution: Layer #1, Channel #31
[L1.q-proj] Pure Text Inputs
[L1.q-proj] Pure Vision Inputs
[L1.q-proj] Text + Vision Inputs
[L1.o-proj] Pure Text Inputs
[L1.o-proj] Pure Vision Inputs
[L1.o-proj] Text + Vision Inputs
[L1.up-proj] Pure Text Inputs
[L1.up-proj] Pure Vision Inputs
[L1.up-proj] Text + Vision Inputs
[L1.down-proj] Pure Text Inputs
[L1.down-proj] Pure Vision Inputs
[L1.down-proj] Text + Vision Inputs
Activation Distribution: Layer #2, Channel #31
[L2.q-proj] Pure Text Inputs
[L2.q-proj] Pure Vision Inputs
[L2.q-proj] Text + Vision Inputs
[L2.o-proj] Pure Text Inputs
[L2.o-proj] Pure Vision Inputs
[L2.o-proj] Text + Vision Inputs
[L2.up-proj]

  plt.figure(figsize=(15, 16))


[L20.q-proj] Pure Vision Inputs
[L20.q-proj] Text + Vision Inputs
[L20.o-proj] Pure Text Inputs
[L20.o-proj] Pure Vision Inputs
[L20.o-proj] Text + Vision Inputs
[L20.up-proj] Pure Text Inputs
[L20.up-proj] Pure Vision Inputs
[L20.up-proj] Text + Vision Inputs
[L20.down-proj] Pure Text Inputs
[L20.down-proj] Pure Vision Inputs
[L20.down-proj] Text + Vision Inputs
Activation Distribution: Layer #21, Channel #31
[L21.q-proj] Pure Text Inputs
[L21.q-proj] Pure Vision Inputs
[L21.q-proj] Text + Vision Inputs
[L21.o-proj] Pure Text Inputs
[L21.o-proj] Pure Vision Inputs
[L21.o-proj] Text + Vision Inputs
[L21.up-proj] Pure Text Inputs
[L21.up-proj] Pure Vision Inputs
[L21.up-proj] Text + Vision Inputs
[L21.down-proj] Pure Text Inputs
[L21.down-proj] Pure Vision Inputs
[L21.down-proj] Text + Vision Inputs
Activation Distribution: Layer #22, Channel #31
[L22.q-proj] Pure Text Inputs
[L22.q-proj] Pure Vision Inputs
[L22.q-proj] Text + Vision Inputs
[L22.o-proj] Pure Text Inputs
[L22.o-proj] Pur

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>

<Figure size 1500x1600 with 0 Axes>