In [1]:
import io
from PIL import Image
from pathlib import Path
import os
import torch
from sklearn.manifold import TSNE
import pickle
import numpy as np
import matplotlib.pyplot as plt
os.environ['HF_HOME'] = '/vol/biomedic3/bglocker/ugproj2324/nns20/CheXagent/.cache' ## THIS HAS TO BE BEFORE YOU IMPORT TRANSFORMERS
from transformers import AutoProcessor, AutoModelForCausalLM, GenerationConfig

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def setup_model() -> tuple:
    device = "cuda" if torch.cuda.is_available() else "cpu"
    dtype = torch.float16

    processor = AutoProcessor.from_pretrained("StanfordAIMI/CheXagent-8b", trust_remote_code=True)
    generation_config = GenerationConfig.from_pretrained("StanfordAIMI/CheXagent-8b")
    model = AutoModelForCausalLM.from_pretrained(
        "StanfordAIMI/CheXagent-8b", torch_dtype=dtype, trust_remote_code=True
    ).to(device)

    return processor, model, device, dtype, generation_config

processor, model, device, dtype, generation_config = setup_model()

Loading checkpoint shards: 100%|██████████| 7/7 [01:41<00:00, 14.56s/it]


In [3]:
patch_embeddings_outputs = []
post_layer_norm_outputs = []
q_former_outputs = []
language_projection_outputs = []
mistral_model_outputs = []

patch_embeddings_output = None
post_layer_norm_output = None
q_former_output = None
language_projection_output = None
mistral_model_output = None

def patch_embedding_hook(module, input, output):
    """Function to be called by the hook for the patch embeddings layer."""
    output = output.cpu().detach()  # Assuming you want to move data to CPU for analysis
    
    global patch_embeddings_output 
    patch_embeddings_output = output
    
    # patch_embeddings_outputs.append(output)  

def post_layer_norm_hook(module, input, output):
    """Function to be called by the hook for the post layer norm layer."""
    output = output.cpu().detach()  # Assuming you want to move data to CPU for analysis
    
    global post_layer_norm_output
    post_layer_norm_output = output
    
    # post_layer_norm_outputs.append(output) 

def language_projection_hook(module, input, output):
    """Function to be called by the hook for the language projection layer."""
    input = input[0].cpu().detach()  # Assuming you want to move data to CPU for analysis
    output = output.cpu().detach()  # Assuming you want to move data to CPU for analysis
    
    global q_former_output
    q_former_output = input

    global language_projection_output
    language_projection_output = output

    # q_former_outputs.append(input)
    # language_projection_outputs.append(output)

def mistral_model_output_hook(module, input, output):
    """Function to be called by the hook for the mistral norm layer."""
    output = output.cpu().detach()  # Assuming you want to move data to CPU for analysis
    
    global mistral_model_output
    mistral_model_output = output

    # mistral_model_outputs.append(output)


In [4]:
embeddings_dict = {}
def generate_with_forward_hooks(images, prompt, processor, model, device, dtype, generation_config):
    image_id = images
    images = Image.open(image_id).convert("RGB")

    # convert image_id to a string
    if isinstance(image_id, Path):
        image_id_string = str(image_id).split("/")[-1].split(".")[0]

    # register hooks
    patch_embeddings = model.vision_model.embeddings.patch_embedding.register_forward_hook(patch_embedding_hook)
    post_layer_norm = model.vision_model.post_layernorm.register_forward_hook(post_layer_norm_hook)
    language_projection = model.language_projection.register_forward_hook(language_projection_hook)
    # mistral_model = model.language_model.model.norm.register_forward_hook(mistral_model_output_hook)

    # complete a forward pass 
    inputs = processor(
        images=images, text=f" USER: <s>{prompt} ASSISTANT: <s>", return_tensors="pt"
    ).to(device=device, dtype=dtype)
    output = model.generate(**inputs, generation_config=generation_config)[0]
    response = processor.tokenizer.decode(output, skip_special_tokens=True)

    global embeddings_dict
    embeddings_dict[image_id_string] = {
        'patch_embeddings': patch_embeddings_output.cpu().numpy(),
        'post_layer_norm': post_layer_norm_output.cpu().numpy(),
        'q_former': q_former_output.cpu().numpy(),
        'language_projection': language_projection_output.cpu().numpy(),
    }

    # remove hooks
    patch_embeddings.remove()
    post_layer_norm.remove()
    language_projection.remove()
    # mistral_model.remove()

    return response

prompt = "Describe the findings"


In [6]:
vindr_dir = Path('/vol/biodata/data/chest_xray/VinDr-CXR/1.0.0_png_512/raw/test/')
# list all images in the directory
images = list(vindr_dir.glob('*.png'))
# image = Image.open(image).convert("RGB")
print(images[:5])

[PosixPath('/vol/biodata/data/chest_xray/VinDr-CXR/1.0.0_png_512/raw/test/f3426b51acbf433d03c5f84a5d16c0d3.png'), PosixPath('/vol/biodata/data/chest_xray/VinDr-CXR/1.0.0_png_512/raw/test/0b877b934aada3284ac13cfa74b5419c.png'), PosixPath('/vol/biodata/data/chest_xray/VinDr-CXR/1.0.0_png_512/raw/test/235fa5309a20f02f306297fee158a109.png'), PosixPath('/vol/biodata/data/chest_xray/VinDr-CXR/1.0.0_png_512/raw/test/6039745a7cf89b4f65da84eaa8c1a226.png'), PosixPath('/vol/biodata/data/chest_xray/VinDr-CXR/1.0.0_png_512/raw/test/0a8dc3de200bc767169b73a6ab91e948.png')]


In [84]:
for image in images[:3]:
    response = generate_with_forward_hooks(image, prompt, processor, model, device, dtype, generation_config)
    print(response)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


There is no pneumothorax or pleural effusion. There is no focal consolidation or pulmonary edema. The cardiomediastinal silhouette is within normal limits.


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


The heart size is normal. The mediastinal and hilar contours are normal. The pulmonary vasculature is normal. The lungs are clear. No pleural effusion or pneumothorax is seen.
The lungs are hyperinflated with flattening of the diaphragms. There is no focal consolidation. There is no pleural effusion or pneumothorax. The cardiomediastinal silhouette is within normal limits.
[]


In [118]:
import pickle

In [None]:
# Define a function to save and clear the global lists to manage memory
embeddings_dict = {}
file_path = Path('/vol/biomedic3/bglocker/ugproj2324/nns20/CheXagent/model_inspection/embeddings/VinDr_test_paired')
file_path.mkdir(parents=True, exist_ok=True)

def save_embeddings_dict_pickle(embeddings_dict, batch_id, file_path):
    file_path.mkdir(parents=True, exist_ok=True)
    with open(file_path / f'embeddings_batch_{batch_id}.pkl', 'wb') as f:
        pickle.dump(embeddings_dict, f)
    embeddings_dict.clear()  # Clear the dictionary after saving

batch_size = 100  # Choose an appropriate size
num_images = len(images)
num_batches = (num_images + batch_size - 1) // batch_size  # Ceiling division to account for the last batch

for batch_id in range(num_batches):
    batch_start = batch_id * batch_size
    batch_end = min(batch_start + batch_size, num_images)
    batch_images = images[batch_start:batch_end]
    # generate the list of file names coreesponding to the images
    file_names = [image.name for image in batch_images]
    print(file_names)
    for image in batch_images:
        response = generate_with_forward_hooks(image, prompt, processor, model, device, dtype, generation_config)
    
    # After processing a batch, save the hook outputs and clear them from memory
    save_embeddings_dict_pickle(embeddings_dict, batch_id, file_path)
    # save_and_clear_hooks_output(batch_id,file_names)


In [None]:
print(embeddings_dict)
print(type(embeddings_dict['f3426b51acbf433d03c5f84a5d16c0d3']))   

In [None]:
import random
# compare the values of the embeddings in the pickle file and the embeddings_dict
file_path = Path('/vol/biomedic3/bglocker/ugproj2324/nns20/CheXagent/model_inspection/embeddings/VinDr_test_paired')
file_path.mkdir(parents=True, exist_ok=True)
for i in range(30):
    print(f'Batch {i}')
    with open(file_path / f'embeddings_batch_{i}.pkl', 'rb') as f:
        embeddings_dict_pickled = pickle.load(f)

        embeddings_dict_pickled_keys = embeddings_dict_pickled.keys()
        # randomly select 10 keys from the embeddings_dict_pickled_keys and compare the values with generated with forward hooks
        n = 25
        random_keys = random.sample(embeddings_dict_pickled_keys, n)

        for key in random_keys:
            image = vindr_dir / f'{key}.png'
            response = generate_with_forward_hooks(image, prompt, processor, model, device, dtype, generation_config)
            if not np.allclose(embeddings_dict_pickled[key]['patch_embeddings'], patch_embeddings_output.cpu().numpy()):
                print(f'Error in patch embeddings for {key}')
            if not np.allclose(embeddings_dict_pickled[key]['post_layer_norm'], post_layer_norm_output.cpu().numpy()):
                print(f'Error in post layer norm embeddings for {key}')
            if not np.allclose(embeddings_dict_pickled[key]['q_former'], q_former_output.cpu().numpy()):
                print(f'Error in q former embeddings for {key}')
            if not np.allclose(embeddings_dict_pickled[key]['language_projection'], language_projection_output.cpu().numpy()):
                print(f'Error in language projection embeddings for {key}')

In [69]:
batch = 0

# open the saved embeddings
batch_patch_embeddings = torch.load(f'/vol/biomedic3/bglocker/ugproj2324/nns20/CheXagent/model_inspection/embeddings/VinDr_test_paired/patch_embeddings_batch_{batch}.pt')
batch_post_layer_norm_outputs = torch.load(f'/vol/biomedic3/bglocker/ugproj2324/nns20/CheXagent/model_inspection/embeddings/VinDr_test_paired/post_layer_norm_outputs_batch_{batch}.pt')
batch_q_former_outputs = torch.load(f'/vol/biomedic3/bglocker/ugproj2324/nns20/CheXagent/model_inspection/embeddings/VinDr_test_paired/q_former_outputs_batch_{batch}.pt')
batch_language_projection_outputs = torch.load(f'/vol/biomedic3/bglocker/ugproj2324/nns20/CheXagent/model_inspection/embeddings/VinDr_test_paired/language_projection_outputs_batch_{batch}.pt')

In [71]:
print(len(batch_patch_embeddings))

150


In [33]:
with open(f'/vol/biomedic3/bglocker/ugproj2324/nns20/CheXagent/model_inspection/embeddings/VinDr_test/file_names_batch_{batch_id}.txt', 'r') as f:
    file_names = f.readlines()
    file_names = [file_name.strip() for file_name in file_names]


for index, file in enumerate(file_names):
    # clear the lists
    patch_embeddings_outputs.clear()
    post_layer_norm_outputs.clear()
    q_former_outputs.clear()
    language_projection_outputs.clear()

    image = Image.open(f'/vol/biodata/data/chest_xray/VinDr-CXR/1.0.0_png_512/raw/test/{file}').convert("RGB")
    response = generate_with_forward_hooks(image, prompt, processor, model, device, dtype, generation_config)
    # check if the embeddings are the same
    if not torch.allclose(patch_embeddings_outputs[0], batch_patch_embeddings[index]):
        print(f"Patch embeddings are not the same for {file}")
    
    if not torch.allclose(post_layer_norm_outputs[0], batch_post_layer_norm_outputs[index]):
        print(f"Post layer norm outputs are not the same for {file}")
        
    if not torch.allclose(q_former_outputs[0], batch_q_former_outputs[index]):
        print(f"Q former outputs are not the same for {file}")
    
    if not torch.allclose(language_projection_outputs[0], batch_language_projection_outputs[index]):
        print(f"Language projection outputs are not the same for {file}")

  [torch.tensor(pixel_values) for pixel_values in encoding_image_processor["pixel_values"]]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for f3426b51acbf433d03c5f84a5d16c0d3.png
Post layer norm outputs are not the same for f3426b51acbf433d03c5f84a5d16c0d3.png
Q former outputs are not the same for f3426b51acbf433d03c5f84a5d16c0d3.png
Language projection outputs are not the same for f3426b51acbf433d03c5f84a5d16c0d3.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 0b877b934aada3284ac13cfa74b5419c.png
Post layer norm outputs are not the same for 0b877b934aada3284ac13cfa74b5419c.png
Q former outputs are not the same for 0b877b934aada3284ac13cfa74b5419c.png
Language projection outputs are not the same for 0b877b934aada3284ac13cfa74b5419c.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 235fa5309a20f02f306297fee158a109.png
Post layer norm outputs are not the same for 235fa5309a20f02f306297fee158a109.png
Q former outputs are not the same for 235fa5309a20f02f306297fee158a109.png
Language projection outputs are not the same for 235fa5309a20f02f306297fee158a109.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 6039745a7cf89b4f65da84eaa8c1a226.png
Post layer norm outputs are not the same for 6039745a7cf89b4f65da84eaa8c1a226.png
Q former outputs are not the same for 6039745a7cf89b4f65da84eaa8c1a226.png
Language projection outputs are not the same for 6039745a7cf89b4f65da84eaa8c1a226.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 0a8dc3de200bc767169b73a6ab91e948.png
Post layer norm outputs are not the same for 0a8dc3de200bc767169b73a6ab91e948.png
Q former outputs are not the same for 0a8dc3de200bc767169b73a6ab91e948.png
Language projection outputs are not the same for 0a8dc3de200bc767169b73a6ab91e948.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for ceec47e464c3dab60af04cd0a5122cf0.png
Post layer norm outputs are not the same for ceec47e464c3dab60af04cd0a5122cf0.png
Q former outputs are not the same for ceec47e464c3dab60af04cd0a5122cf0.png
Language projection outputs are not the same for ceec47e464c3dab60af04cd0a5122cf0.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for e14840dd62dcaa3f2f429ed69a89059c.png
Post layer norm outputs are not the same for e14840dd62dcaa3f2f429ed69a89059c.png
Q former outputs are not the same for e14840dd62dcaa3f2f429ed69a89059c.png
Language projection outputs are not the same for e14840dd62dcaa3f2f429ed69a89059c.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 0aa43b52e98700f8696bf36c10a76cea.png
Post layer norm outputs are not the same for 0aa43b52e98700f8696bf36c10a76cea.png
Q former outputs are not the same for 0aa43b52e98700f8696bf36c10a76cea.png
Language projection outputs are not the same for 0aa43b52e98700f8696bf36c10a76cea.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for dba84e829f62c1272584f758b2204aa3.png
Post layer norm outputs are not the same for dba84e829f62c1272584f758b2204aa3.png
Q former outputs are not the same for dba84e829f62c1272584f758b2204aa3.png
Language projection outputs are not the same for dba84e829f62c1272584f758b2204aa3.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 5ef0d0b605f39b09df42d293e87971e3.png
Post layer norm outputs are not the same for 5ef0d0b605f39b09df42d293e87971e3.png
Q former outputs are not the same for 5ef0d0b605f39b09df42d293e87971e3.png
Language projection outputs are not the same for 5ef0d0b605f39b09df42d293e87971e3.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 0d8b5533f40cf4b0c3f2d86b1d61d6bc.png
Post layer norm outputs are not the same for 0d8b5533f40cf4b0c3f2d86b1d61d6bc.png
Q former outputs are not the same for 0d8b5533f40cf4b0c3f2d86b1d61d6bc.png
Language projection outputs are not the same for 0d8b5533f40cf4b0c3f2d86b1d61d6bc.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for ae6434cc392e34dc83ce31ce480c5858.png
Post layer norm outputs are not the same for ae6434cc392e34dc83ce31ce480c5858.png
Q former outputs are not the same for ae6434cc392e34dc83ce31ce480c5858.png
Language projection outputs are not the same for ae6434cc392e34dc83ce31ce480c5858.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for f2385e76586b911c47398381b3ab3dd3.png
Post layer norm outputs are not the same for f2385e76586b911c47398381b3ab3dd3.png
Q former outputs are not the same for f2385e76586b911c47398381b3ab3dd3.png
Language projection outputs are not the same for f2385e76586b911c47398381b3ab3dd3.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for f6a55d8e3471a4cce6e6be9713bf8acc.png
Post layer norm outputs are not the same for f6a55d8e3471a4cce6e6be9713bf8acc.png
Q former outputs are not the same for f6a55d8e3471a4cce6e6be9713bf8acc.png
Language projection outputs are not the same for f6a55d8e3471a4cce6e6be9713bf8acc.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 4924e87f8069a1e4d3bb55f7bbeecdab.png
Post layer norm outputs are not the same for 4924e87f8069a1e4d3bb55f7bbeecdab.png
Q former outputs are not the same for 4924e87f8069a1e4d3bb55f7bbeecdab.png
Language projection outputs are not the same for 4924e87f8069a1e4d3bb55f7bbeecdab.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for b922119aa88305502c4a33b7223b8931.png
Post layer norm outputs are not the same for b922119aa88305502c4a33b7223b8931.png
Q former outputs are not the same for b922119aa88305502c4a33b7223b8931.png
Language projection outputs are not the same for b922119aa88305502c4a33b7223b8931.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for c9c96c529b368b1a26c6cadc143c5aae.png
Post layer norm outputs are not the same for c9c96c529b368b1a26c6cadc143c5aae.png
Q former outputs are not the same for c9c96c529b368b1a26c6cadc143c5aae.png
Language projection outputs are not the same for c9c96c529b368b1a26c6cadc143c5aae.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 529e1b84a553a1eace23e78a49d041aa.png
Post layer norm outputs are not the same for 529e1b84a553a1eace23e78a49d041aa.png
Q former outputs are not the same for 529e1b84a553a1eace23e78a49d041aa.png
Language projection outputs are not the same for 529e1b84a553a1eace23e78a49d041aa.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 3fe12c24bee9c864bf65441b49a53d79.png
Post layer norm outputs are not the same for 3fe12c24bee9c864bf65441b49a53d79.png
Q former outputs are not the same for 3fe12c24bee9c864bf65441b49a53d79.png
Language projection outputs are not the same for 3fe12c24bee9c864bf65441b49a53d79.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 0af8628f5cbe0786db483a10934d1be5.png
Post layer norm outputs are not the same for 0af8628f5cbe0786db483a10934d1be5.png
Q former outputs are not the same for 0af8628f5cbe0786db483a10934d1be5.png
Language projection outputs are not the same for 0af8628f5cbe0786db483a10934d1be5.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for a6f83640b41a64326aba96cdef3133dd.png
Post layer norm outputs are not the same for a6f83640b41a64326aba96cdef3133dd.png
Q former outputs are not the same for a6f83640b41a64326aba96cdef3133dd.png
Language projection outputs are not the same for a6f83640b41a64326aba96cdef3133dd.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 925d945d9d6cf5d0f8bb294e8c68fdbc.png
Post layer norm outputs are not the same for 925d945d9d6cf5d0f8bb294e8c68fdbc.png
Q former outputs are not the same for 925d945d9d6cf5d0f8bb294e8c68fdbc.png
Language projection outputs are not the same for 925d945d9d6cf5d0f8bb294e8c68fdbc.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for d025e3d642cb20b297bb70aa06eb3447.png
Post layer norm outputs are not the same for d025e3d642cb20b297bb70aa06eb3447.png
Q former outputs are not the same for d025e3d642cb20b297bb70aa06eb3447.png
Language projection outputs are not the same for d025e3d642cb20b297bb70aa06eb3447.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 20a92721508c6b74dac74143d3221152.png
Post layer norm outputs are not the same for 20a92721508c6b74dac74143d3221152.png
Q former outputs are not the same for 20a92721508c6b74dac74143d3221152.png
Language projection outputs are not the same for 20a92721508c6b74dac74143d3221152.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 884f2b574d9600dd52f4a04c9aa3a230.png
Post layer norm outputs are not the same for 884f2b574d9600dd52f4a04c9aa3a230.png
Q former outputs are not the same for 884f2b574d9600dd52f4a04c9aa3a230.png
Language projection outputs are not the same for 884f2b574d9600dd52f4a04c9aa3a230.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for c4232411727c2821c17c059a260427d9.png
Post layer norm outputs are not the same for c4232411727c2821c17c059a260427d9.png
Q former outputs are not the same for c4232411727c2821c17c059a260427d9.png
Language projection outputs are not the same for c4232411727c2821c17c059a260427d9.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for a7b95bc6823cdde758fc58a57885ca20.png
Post layer norm outputs are not the same for a7b95bc6823cdde758fc58a57885ca20.png
Q former outputs are not the same for a7b95bc6823cdde758fc58a57885ca20.png
Language projection outputs are not the same for a7b95bc6823cdde758fc58a57885ca20.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 9e03f33a5f100880b2d2bdfbc46b7b72.png
Post layer norm outputs are not the same for 9e03f33a5f100880b2d2bdfbc46b7b72.png
Q former outputs are not the same for 9e03f33a5f100880b2d2bdfbc46b7b72.png
Language projection outputs are not the same for 9e03f33a5f100880b2d2bdfbc46b7b72.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 4e685bcf518b823638340e2cb20fbad8.png
Post layer norm outputs are not the same for 4e685bcf518b823638340e2cb20fbad8.png
Q former outputs are not the same for 4e685bcf518b823638340e2cb20fbad8.png
Language projection outputs are not the same for 4e685bcf518b823638340e2cb20fbad8.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 4e60eda03b7df9196e6309009114f54b.png
Post layer norm outputs are not the same for 4e60eda03b7df9196e6309009114f54b.png
Q former outputs are not the same for 4e60eda03b7df9196e6309009114f54b.png
Language projection outputs are not the same for 4e60eda03b7df9196e6309009114f54b.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 4b4c014996678e36fa5aa1d74ff71a1a.png
Post layer norm outputs are not the same for 4b4c014996678e36fa5aa1d74ff71a1a.png
Q former outputs are not the same for 4b4c014996678e36fa5aa1d74ff71a1a.png
Language projection outputs are not the same for 4b4c014996678e36fa5aa1d74ff71a1a.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for ab9dedb9ff4cd9e80dca74505b599105.png
Post layer norm outputs are not the same for ab9dedb9ff4cd9e80dca74505b599105.png
Q former outputs are not the same for ab9dedb9ff4cd9e80dca74505b599105.png
Language projection outputs are not the same for ab9dedb9ff4cd9e80dca74505b599105.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for f599fe9d73fb3cc607a03fb258b7e198.png
Post layer norm outputs are not the same for f599fe9d73fb3cc607a03fb258b7e198.png
Q former outputs are not the same for f599fe9d73fb3cc607a03fb258b7e198.png
Language projection outputs are not the same for f599fe9d73fb3cc607a03fb258b7e198.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for ca06f0a42e10816d50258f5cd663414a.png
Post layer norm outputs are not the same for ca06f0a42e10816d50258f5cd663414a.png
Q former outputs are not the same for ca06f0a42e10816d50258f5cd663414a.png
Language projection outputs are not the same for ca06f0a42e10816d50258f5cd663414a.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for d6678cb7ae39f575d35ab9da6d7cb171.png
Post layer norm outputs are not the same for d6678cb7ae39f575d35ab9da6d7cb171.png
Q former outputs are not the same for d6678cb7ae39f575d35ab9da6d7cb171.png
Language projection outputs are not the same for d6678cb7ae39f575d35ab9da6d7cb171.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 218e819ed8c0fe25c92ee7f1d5b993c7.png
Post layer norm outputs are not the same for 218e819ed8c0fe25c92ee7f1d5b993c7.png
Q former outputs are not the same for 218e819ed8c0fe25c92ee7f1d5b993c7.png
Language projection outputs are not the same for 218e819ed8c0fe25c92ee7f1d5b993c7.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 6fe65e41cb661237e9030f191e3a2a9b.png
Post layer norm outputs are not the same for 6fe65e41cb661237e9030f191e3a2a9b.png
Q former outputs are not the same for 6fe65e41cb661237e9030f191e3a2a9b.png
Language projection outputs are not the same for 6fe65e41cb661237e9030f191e3a2a9b.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 85ffcfa2cb02025abd57e2ed21ec4aa2.png
Post layer norm outputs are not the same for 85ffcfa2cb02025abd57e2ed21ec4aa2.png
Q former outputs are not the same for 85ffcfa2cb02025abd57e2ed21ec4aa2.png
Language projection outputs are not the same for 85ffcfa2cb02025abd57e2ed21ec4aa2.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 5d63f31cc8e21664c69bd7613b1b76e4.png
Post layer norm outputs are not the same for 5d63f31cc8e21664c69bd7613b1b76e4.png
Q former outputs are not the same for 5d63f31cc8e21664c69bd7613b1b76e4.png
Language projection outputs are not the same for 5d63f31cc8e21664c69bd7613b1b76e4.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 5e9b9e927b853f5a95cd487b249e77b2.png
Post layer norm outputs are not the same for 5e9b9e927b853f5a95cd487b249e77b2.png
Q former outputs are not the same for 5e9b9e927b853f5a95cd487b249e77b2.png
Language projection outputs are not the same for 5e9b9e927b853f5a95cd487b249e77b2.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for d2f46e1af0a1d93e1f8ec92633f1df86.png
Post layer norm outputs are not the same for d2f46e1af0a1d93e1f8ec92633f1df86.png
Q former outputs are not the same for d2f46e1af0a1d93e1f8ec92633f1df86.png
Language projection outputs are not the same for d2f46e1af0a1d93e1f8ec92633f1df86.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 3cb7123b676ed1963d77850bfe185482.png
Post layer norm outputs are not the same for 3cb7123b676ed1963d77850bfe185482.png
Q former outputs are not the same for 3cb7123b676ed1963d77850bfe185482.png
Language projection outputs are not the same for 3cb7123b676ed1963d77850bfe185482.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for ac5b9c5e3b65367ddbd1097db80f7a3c.png
Post layer norm outputs are not the same for ac5b9c5e3b65367ddbd1097db80f7a3c.png
Q former outputs are not the same for ac5b9c5e3b65367ddbd1097db80f7a3c.png
Language projection outputs are not the same for ac5b9c5e3b65367ddbd1097db80f7a3c.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for be794e647d0f6f47a4cd21964c6f96e5.png
Post layer norm outputs are not the same for be794e647d0f6f47a4cd21964c6f96e5.png
Q former outputs are not the same for be794e647d0f6f47a4cd21964c6f96e5.png
Language projection outputs are not the same for be794e647d0f6f47a4cd21964c6f96e5.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 96d1504f104b0b4ad88de4a50d413197.png
Post layer norm outputs are not the same for 96d1504f104b0b4ad88de4a50d413197.png
Q former outputs are not the same for 96d1504f104b0b4ad88de4a50d413197.png
Language projection outputs are not the same for 96d1504f104b0b4ad88de4a50d413197.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for e78d3dafa3d35c1ef2beaed26f884b84.png
Post layer norm outputs are not the same for e78d3dafa3d35c1ef2beaed26f884b84.png
Q former outputs are not the same for e78d3dafa3d35c1ef2beaed26f884b84.png
Language projection outputs are not the same for e78d3dafa3d35c1ef2beaed26f884b84.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for e1d0449f1cb674cecbd6c6dd3da50b1b.png
Post layer norm outputs are not the same for e1d0449f1cb674cecbd6c6dd3da50b1b.png
Q former outputs are not the same for e1d0449f1cb674cecbd6c6dd3da50b1b.png
Language projection outputs are not the same for e1d0449f1cb674cecbd6c6dd3da50b1b.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 7d6d84f1373f54e8b4a7c72e4b0be842.png
Post layer norm outputs are not the same for 7d6d84f1373f54e8b4a7c72e4b0be842.png
Q former outputs are not the same for 7d6d84f1373f54e8b4a7c72e4b0be842.png
Language projection outputs are not the same for 7d6d84f1373f54e8b4a7c72e4b0be842.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 75ef4701bba51b04b002e82495d7fc6b.png
Post layer norm outputs are not the same for 75ef4701bba51b04b002e82495d7fc6b.png
Q former outputs are not the same for 75ef4701bba51b04b002e82495d7fc6b.png
Language projection outputs are not the same for 75ef4701bba51b04b002e82495d7fc6b.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 2f8f4f988fd84809caf191477ebc05db.png
Post layer norm outputs are not the same for 2f8f4f988fd84809caf191477ebc05db.png
Q former outputs are not the same for 2f8f4f988fd84809caf191477ebc05db.png
Language projection outputs are not the same for 2f8f4f988fd84809caf191477ebc05db.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 0c0b8e98262a662c362929aa60e6bed9.png
Post layer norm outputs are not the same for 0c0b8e98262a662c362929aa60e6bed9.png
Q former outputs are not the same for 0c0b8e98262a662c362929aa60e6bed9.png
Language projection outputs are not the same for 0c0b8e98262a662c362929aa60e6bed9.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 65279dbfdffd5d1250e8f7d0293d5727.png
Post layer norm outputs are not the same for 65279dbfdffd5d1250e8f7d0293d5727.png
Q former outputs are not the same for 65279dbfdffd5d1250e8f7d0293d5727.png
Language projection outputs are not the same for 65279dbfdffd5d1250e8f7d0293d5727.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 35140570aec20f94b22429d00dc142e9.png
Post layer norm outputs are not the same for 35140570aec20f94b22429d00dc142e9.png
Q former outputs are not the same for 35140570aec20f94b22429d00dc142e9.png
Language projection outputs are not the same for 35140570aec20f94b22429d00dc142e9.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 5175798e4a23ac250b47c0c4fd83a725.png
Post layer norm outputs are not the same for 5175798e4a23ac250b47c0c4fd83a725.png
Q former outputs are not the same for 5175798e4a23ac250b47c0c4fd83a725.png
Language projection outputs are not the same for 5175798e4a23ac250b47c0c4fd83a725.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 10d3204872c83731aeaee57e020b768a.png
Post layer norm outputs are not the same for 10d3204872c83731aeaee57e020b768a.png
Q former outputs are not the same for 10d3204872c83731aeaee57e020b768a.png
Language projection outputs are not the same for 10d3204872c83731aeaee57e020b768a.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 3f311e13697f83d41f55f079232fe7ee.png
Post layer norm outputs are not the same for 3f311e13697f83d41f55f079232fe7ee.png
Q former outputs are not the same for 3f311e13697f83d41f55f079232fe7ee.png
Language projection outputs are not the same for 3f311e13697f83d41f55f079232fe7ee.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 53ae3d1a464529cb387ce44aa655846f.png
Post layer norm outputs are not the same for 53ae3d1a464529cb387ce44aa655846f.png
Q former outputs are not the same for 53ae3d1a464529cb387ce44aa655846f.png
Language projection outputs are not the same for 53ae3d1a464529cb387ce44aa655846f.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for e52b5e6afc1c45823626dfc0b0c07a8c.png
Post layer norm outputs are not the same for e52b5e6afc1c45823626dfc0b0c07a8c.png
Q former outputs are not the same for e52b5e6afc1c45823626dfc0b0c07a8c.png
Language projection outputs are not the same for e52b5e6afc1c45823626dfc0b0c07a8c.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 2f629036ede0886b8a4eef89f6526fc6.png
Post layer norm outputs are not the same for 2f629036ede0886b8a4eef89f6526fc6.png
Q former outputs are not the same for 2f629036ede0886b8a4eef89f6526fc6.png
Language projection outputs are not the same for 2f629036ede0886b8a4eef89f6526fc6.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 5995bef3891e94a9666695e1302247cc.png
Post layer norm outputs are not the same for 5995bef3891e94a9666695e1302247cc.png
Q former outputs are not the same for 5995bef3891e94a9666695e1302247cc.png
Language projection outputs are not the same for 5995bef3891e94a9666695e1302247cc.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 32ce0ed98d68f9ed492aa9cdbe8a11fe.png
Post layer norm outputs are not the same for 32ce0ed98d68f9ed492aa9cdbe8a11fe.png
Q former outputs are not the same for 32ce0ed98d68f9ed492aa9cdbe8a11fe.png
Language projection outputs are not the same for 32ce0ed98d68f9ed492aa9cdbe8a11fe.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 5beee832c007826c93c25ba287ee4d48.png
Post layer norm outputs are not the same for 5beee832c007826c93c25ba287ee4d48.png
Q former outputs are not the same for 5beee832c007826c93c25ba287ee4d48.png
Language projection outputs are not the same for 5beee832c007826c93c25ba287ee4d48.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 950f7420fe9cb8a960a5c3ca47c35748.png
Post layer norm outputs are not the same for 950f7420fe9cb8a960a5c3ca47c35748.png
Q former outputs are not the same for 950f7420fe9cb8a960a5c3ca47c35748.png
Language projection outputs are not the same for 950f7420fe9cb8a960a5c3ca47c35748.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 91283908618845879937985817083c93.png
Post layer norm outputs are not the same for 91283908618845879937985817083c93.png
Q former outputs are not the same for 91283908618845879937985817083c93.png
Language projection outputs are not the same for 91283908618845879937985817083c93.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for d2e77def0302c94f744079974be9bc53.png
Post layer norm outputs are not the same for d2e77def0302c94f744079974be9bc53.png
Q former outputs are not the same for d2e77def0302c94f744079974be9bc53.png
Language projection outputs are not the same for d2e77def0302c94f744079974be9bc53.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for dd8e8db5d12ef22a32ee27794dfa0d17.png
Post layer norm outputs are not the same for dd8e8db5d12ef22a32ee27794dfa0d17.png
Q former outputs are not the same for dd8e8db5d12ef22a32ee27794dfa0d17.png
Language projection outputs are not the same for dd8e8db5d12ef22a32ee27794dfa0d17.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for f3699d3679128726466b307f7a7cba73.png
Post layer norm outputs are not the same for f3699d3679128726466b307f7a7cba73.png
Q former outputs are not the same for f3699d3679128726466b307f7a7cba73.png
Language projection outputs are not the same for f3699d3679128726466b307f7a7cba73.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 6cce56dda2a5b90c9ac2987b1a8e3f45.png
Post layer norm outputs are not the same for 6cce56dda2a5b90c9ac2987b1a8e3f45.png
Q former outputs are not the same for 6cce56dda2a5b90c9ac2987b1a8e3f45.png
Language projection outputs are not the same for 6cce56dda2a5b90c9ac2987b1a8e3f45.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 119a7830be7eaba5f4feb67d1ee9011e.png
Post layer norm outputs are not the same for 119a7830be7eaba5f4feb67d1ee9011e.png
Q former outputs are not the same for 119a7830be7eaba5f4feb67d1ee9011e.png
Language projection outputs are not the same for 119a7830be7eaba5f4feb67d1ee9011e.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 0e5f2fdf49cbfdbe2a7c0861d15a73ff.png
Post layer norm outputs are not the same for 0e5f2fdf49cbfdbe2a7c0861d15a73ff.png
Q former outputs are not the same for 0e5f2fdf49cbfdbe2a7c0861d15a73ff.png
Language projection outputs are not the same for 0e5f2fdf49cbfdbe2a7c0861d15a73ff.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for fb1150a90723df92e2385c956db5aeef.png
Post layer norm outputs are not the same for fb1150a90723df92e2385c956db5aeef.png
Q former outputs are not the same for fb1150a90723df92e2385c956db5aeef.png
Language projection outputs are not the same for fb1150a90723df92e2385c956db5aeef.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for 40cd10b8770cee353b0ae6a8192097ab.png
Post layer norm outputs are not the same for 40cd10b8770cee353b0ae6a8192097ab.png
Q former outputs are not the same for 40cd10b8770cee353b0ae6a8192097ab.png
Language projection outputs are not the same for 40cd10b8770cee353b0ae6a8192097ab.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for f5de7e513d2c4085e8b7fd6b5e5db57a.png
Post layer norm outputs are not the same for f5de7e513d2c4085e8b7fd6b5e5db57a.png
Q former outputs are not the same for f5de7e513d2c4085e8b7fd6b5e5db57a.png
Language projection outputs are not the same for f5de7e513d2c4085e8b7fd6b5e5db57a.png


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Patch embeddings are not the same for c6829051ecb281656c987fa2cfe5c706.png
Post layer norm outputs are not the same for c6829051ecb281656c987fa2cfe5c706.png
Q former outputs are not the same for c6829051ecb281656c987fa2cfe5c706.png
Language projection outputs are not the same for c6829051ecb281656c987fa2cfe5c706.png


KeyboardInterrupt: 

In [15]:
# compare the embeddings
# patch embeddings
print(torch.allclose(batch_0_patch_embeddings[0], patch_embeddings_outputs[0]))
# post layer norm
print(torch.allclose(batch_0_post_layer_norm_outputs[0], post_layer_norm_outputs[0]))
# q_former
print(torch.allclose(batch_0_q_former_outputs[0], q_former_outputs[0]))
# language projection
print(torch.allclose(batch_0_language_projection_outputs[0], language_projection_outputs[0]))

True
True
True
True


In [16]:
print(batch_0_q_former_outputs[0])

tensor([[[-0.2561, -0.2659,  1.2744,  ...,  0.2036, -0.5537, -1.1152],
         [-0.2566, -0.2654,  1.2744,  ...,  0.2036, -0.5542, -1.1143],
         [-0.2559, -0.2646,  1.2734,  ...,  0.2040, -0.5532, -1.1133],
         ...,
         [-0.2563, -0.2651,  1.2744,  ...,  0.2040, -0.5527, -1.1143],
         [-0.2908, -0.2450,  1.2832,  ...,  0.1680, -0.4976, -1.1416],
         [-0.2561, -0.2656,  1.2744,  ...,  0.2040, -0.5537, -1.1152]]],
       dtype=torch.float16)
