In [1]:
!pip install transformers==4.44.0

Collecting transformers==4.44.0
  Downloading transformers-4.44.0-py3-none-any.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.7/43.7 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
Downloading transformers-4.44.0-py3-none-any.whl (9.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.5/9.5 MB[0m [31m72.0 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.42.3
    Uninstalling transformers-4.42.3:
      Successfully uninstalled transformers-4.42.3
Successfully installed transformers-4.44.0


In [2]:
hf_token = "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"

In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
import torch
import pickle
import json


set_seed(42)

def test_dola(model_name, hf_token=None):
    print(f"\nTesting {model_name}")
    
    tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
    model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, token=hf_token)
    
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(device)
    
    text = "On what date was the Declaration of Independence officially signed?"
    inputs = tokenizer(text, return_tensors="pt").to(device)
    
    results = {
        "model_name": model_name,
        "input_text": text,
        "outputs": {}
    }

    # Vanilla greedy decoding
    vanilla_output = model.generate(**inputs, do_sample=False, max_new_tokens=50)
    vanilla_text = tokenizer.decode(vanilla_output[0, inputs.input_ids.shape[-1]:], skip_special_tokens=True)
    results["outputs"]["vanilla"] = vanilla_text
    print("Vanilla output:", vanilla_text)
    
    # DoLa decoding with contrasting higher part of layers (layers 16,18,...,30)
    try:
        dola_high_output = model.generate(**inputs, do_sample=False, max_new_tokens=50, dola_layers='high')
        dola_high_text = tokenizer.decode(dola_high_output[0, inputs.input_ids.shape[-1]:], skip_special_tokens=True)
        results["outputs"]["dola_high"] = dola_high_text
        print("\nDoLA high output:", dola_high_text)
    except Exception as e:
        error_message = str(e)
        results["outputs"]["dola_high"] = f"Error: {error_message}"
        print(f"\nError occurred during DoLA high decoding: {error_message}")

    # DoLa decoding with contrasting specific layers (layers 28 and 30)
    try:
        dola_custom_output = model.generate(**inputs, do_sample=False, max_new_tokens=50, dola_layers=[28,30], repetition_penalty=1.2)
        dola_custom_text = tokenizer.decode(dola_custom_output[0, inputs.input_ids.shape[-1]:], skip_special_tokens=True)
        results["outputs"]["dola_custom"] = dola_custom_text
        print("\nDoLA custom output:", dola_custom_text)
    except Exception as e:
        error_message = str(e)
        results["outputs"]["dola_custom"] = f"Error: {error_message}"
        print(f"\nError occurred during DoLA custom decoding: {error_message}")

    # Save results to a JSON file
    with open(f"{model_name.replace('/', '_')}_results.json", 'w') as f:
        json.dump(results, f, indent=2)

    # Save results to a pickle file
    filename = f"{model_name.replace('/', '_')}_results.pkl"
    with open(filename, 'wb') as f:
        pickle.dump(results, f)
    
    print(f"\nResults saved to {filename}")

    return results

2024-08-18 08:58:09.590328: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-18 08:58:09.590440: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-18 08:58:09.714325: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [5]:
test_dola("huggyllama/llama-7b", hf_token=hf_token)


Testing huggyllama/llama-7b


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Vanilla output: 
The Declaration of Independence was signed on July 4, 1776.
What was the date of the signing of the Declaration of Independence?
The Declaration of Independence was signed on July 4,

DoLA high output: 
July 4, 1776, when the Continental Congress voted to separate from Great Britain. The 56 delegates to the Continental Congress signed the Declaration on August 2, 1776.

DoLA custom output: 
It was officially signed on 2 August 1776, when 56 members of the Second Continental Congress, representing the original 13 American colonies, voted unanimously for the resolution for independence. The 2

Results saved to huggyllama_llama-7b_results.pkl


{'model_name': 'huggyllama/llama-7b',
 'input_text': 'On what date was the Declaration of Independence officially signed?',
 'outputs': {'vanilla': '\nThe Declaration of Independence was signed on July 4, 1776.\nWhat was the date of the signing of the Declaration of Independence?\nThe Declaration of Independence was signed on July 4,',
  'dola_high': '\nJuly 4, 1776, when the Continental Congress voted to separate from Great Britain. The 56 delegates to the Continental Congress signed the Declaration on August 2, 1776.',
  'dola_custom': '\nIt was officially signed on 2 August 1776, when 56 members of the Second Continental Congress, representing the original 13 American colonies, voted unanimously for the resolution for independence. The 2'}}

In [9]:
test_dola("google/gemma-2-2b-it", hf_token=hf_token)


Testing google/gemma-2-2b-it


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Vanilla output: 

The Declaration of Independence was adopted on **July 4, 1776**.  

However, it wasn't officially signed until **August 2, 1776**. 


DoLA high output: 

**Answer:**  **July 4, 1776** 


However, it's important to remember: 
* **The Declaration was adopted on July 4th.** The Continental Congress voted on July 4th

Error occurred during DoLA custom decoding: stack expects a non-empty TensorList

Results saved to google_gemma-2-2b-it_results.pkl


{'model_name': 'google/gemma-2-2b-it',
 'input_text': 'On what date was the Declaration of Independence officially signed?',
 'outputs': {'vanilla': "\n\nThe Declaration of Independence was adopted on **July 4, 1776**.  \n\nHowever, it wasn't officially signed until **August 2, 1776**. \n",
  'dola_high': "\n\n**Answer:**  **July 4, 1776** \n\n\nHowever, it's important to remember: \n* **The Declaration was adopted on July 4th.** The Continental Congress voted on July 4th",
  'dola_custom': 'Error: stack expects a non-empty TensorList'}}

In [11]:
test_dola("meta-llama/Meta-Llama-3.1-8B-Instruct", hf_token=hf_token)


Testing meta-llama/Meta-Llama-3.1-8B-Instruct


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Vanilla output:  August 2, 1776
A. August 2, 1776
B. July 4, 1776
C. August 2, 1776
D. July 4, 1776

Answer:


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



DoLA high output:  August 2, 1776
A. August 2, 1776
B. July 4, 1776
C. August 2, 1776
D. July 4, 1776
Answer:

DoLA custom output:  August 2, 1776
The Declaration of Independence was officially adopted and signed by the Continental Congress on July 4, 1776. The document was written primarily by Thomas Jefferson, with contributions from John Adams, Benjamin Franklin, Robert Livingston

Results saved to meta-llama_Meta-Llama-3.1-8B-Instruct_results.pkl


{'model_name': 'meta-llama/Meta-Llama-3.1-8B-Instruct',
 'input_text': 'On what date was the Declaration of Independence officially signed?',
 'outputs': {'vanilla': ' August 2, 1776\nA. August 2, 1776\nB. July 4, 1776\nC. August 2, 1776\nD. July 4, 1776\n\nAnswer:',
  'dola_high': ' August 2, 1776\nA. August 2, 1776\nB. July 4, 1776\nC. August 2, 1776\nD. July 4, 1776\nAnswer:',
  'dola_custom': ' August 2, 1776\nThe Declaration of Independence was officially adopted and signed by the Continental Congress on July 4, 1776. The document was written primarily by Thomas Jefferson, with contributions from John Adams, Benjamin Franklin, Robert Livingston'}}

In [13]:
test_dola("mistralai/Mistral-7B-Instruct-v0.1", hf_token=hf_token)


Testing mistralai/Mistral-7B-Instruct-v0.1


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Vanilla output: 

July 4, 1776


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



DoLA high output: 

July 4, 1776

DoLA custom output: 
July 4, 1776

Results saved to mistralai_Mistral-7B-Instruct-v0.1_results.pkl


{'model_name': 'mistralai/Mistral-7B-Instruct-v0.1',
 'input_text': 'On what date was the Declaration of Independence officially signed?',
 'outputs': {'vanilla': '\n\nJuly 4, 1776',
  'dola_high': '\n\nJuly 4, 1776',
  'dola_custom': '\nJuly 4, 1776'}}

In [14]:
!pip install -U bitsandbytes

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting bitsandbytes
  Downloading bitsandbytes-0.43.3-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Downloading bitsandbytes-0.43.3-py3-none-manylinux_2_24_x86_64.whl (137.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.5/137.5 MB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.43.3


In [17]:
from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
import torch
import pickle
import json


set_seed(42)

def test_dola_u(model_name, hf_token=None):
    print(f"\nTesting {model_name}")
    
    tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
    model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, token=hf_token)
    
    
    text = "On what date was the Declaration of Independence officially signed?"
    inputs = tokenizer(text, return_tensors="pt")
    
    results = {
        "model_name": model_name,
        "input_text": text,
        "outputs": {}
    }

    # Vanilla greedy decoding
    vanilla_output = model.generate(**inputs, do_sample=False, max_new_tokens=50)
    vanilla_text = tokenizer.decode(vanilla_output[0, inputs.input_ids.shape[-1]:], skip_special_tokens=True)
    results["outputs"]["vanilla"] = vanilla_text
    print("Vanilla output:", vanilla_text)
    
    # DoLa decoding with contrasting higher part of layers (layers 16,18,...,30)
    try:
        dola_high_output = model.generate(**inputs, do_sample=False, max_new_tokens=50, dola_layers='high')
        dola_high_text = tokenizer.decode(dola_high_output[0, inputs.input_ids.shape[-1]:], skip_special_tokens=True)
        results["outputs"]["dola_high"] = dola_high_text
        print("\nDoLA high output:", dola_high_text)
    except Exception as e:
        error_message = str(e)
        results["outputs"]["dola_high"] = f"Error: {error_message}"
        print(f"\nError occurred during DoLA high decoding: {error_message}")

    # DoLa decoding with contrasting specific layers (layers 28 and 30)
    try:
        dola_custom_output = model.generate(**inputs, do_sample=False, max_new_tokens=50, dola_layers=[28,30], repetition_penalty=1.2)
        dola_custom_text = tokenizer.decode(dola_custom_output[0, inputs.input_ids.shape[-1]:], skip_special_tokens=True)
        results["outputs"]["dola_custom"] = dola_custom_text
        print("\nDoLA custom output:", dola_custom_text)
    except Exception as e:
        error_message = str(e)
        results["outputs"]["dola_custom"] = f"Error: {error_message}"
        print(f"\nError occurred during DoLA custom decoding: {error_message}")

    # Save results to a JSON file
    with open(f"{model_name.replace('/', '_')}_results.json", 'w') as f:
        json.dump(results, f, indent=2)

    # Save results to a pickle file
    filename = f"{model_name.replace('/', '_')}_results.pkl"
    with open(filename, 'wb') as f:
        pickle.dump(results, f)
    
    print(f"\nResults saved to {filename}")

    return results

In [21]:
# check unsloth lib
test_dola_u("unsloth/Meta-Llama-3.1-8B-bnb-4bit", hf_token=hf_token)


Testing unsloth/Meta-Llama-3.1-8B-bnb-4bit


Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


Vanilla output:  The Declaration of Independence was signed on August 2, 1776. The Declaration of Independence was signed on August 2, 1776. The Declaration of Independence was signed on August 2, 1776. The Declaration of Independence was





Error occurred during DoLA high decoding: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!

Error occurred during DoLA custom decoding: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!

Results saved to unsloth_Meta-Llama-3.1-8B-bnb-4bit_results.pkl


{'model_name': 'unsloth/Meta-Llama-3.1-8B-bnb-4bit',
 'input_text': 'On what date was the Declaration of Independence officially signed?',
 'outputs': {'vanilla': ' The Declaration of Independence was signed on August 2, 1776. The Declaration of Independence was signed on August 2, 1776. The Declaration of Independence was signed on August 2, 1776. The Declaration of Independence was',
  'dola_high': 'Error: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!',
  'dola_custom': 'Error: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!'}}

In [6]:
import torch
torch.cuda.empty_cache()
import gc
gc.collect()

del model
del tokenizer


NameError: name 'model' is not defined

In [7]:
!kill -9 29337

  pid, fd = os.forkpty()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


/bin/bash: line 0: kill: (29337) - No such process


# Results

In [30]:
import os
import json
import pandas as pd

path_with_json = "/kaggle/input/dola-tests"

json_files = [pos_json for pos_json in os.listdir(path_with_json) if pos_json.endswith('.json')]

# Initialize an empty list to store the data
data = []

# Loop through each JSON file and load the data
for js in json_files:
    with open(os.path.join(path_with_json, js)) as json_file:
        json_text = json.load(json_file)
        for key in json_text['outputs']:
            json_text[f"{key}_output"] = json_text['outputs'][key]
        
        del json_text["outputs"]
        
        data.append(json_text)

# Convert list of dictionaries to DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
print(df)


                              model_name  \
0                    huggyllama/llama-7b   
1     mistralai/Mistral-7B-Instruct-v0.1   
2     unsloth/Meta-Llama-3.1-8B-bnb-4bit   
3  meta-llama/Meta-Llama-3.1-8B-Instruct   
4                   google/gemma-2-2b-it   

                                          input_text  \
0  On what date was the Declaration of Independen...   
1  On what date was the Declaration of Independen...   
2  On what date was the Declaration of Independen...   
3  On what date was the Declaration of Independen...   
4  On what date was the Declaration of Independen...   

                                      vanilla_output  \
0  \nThe Declaration of Independence was signed o...   
1                                   \n\nJuly 4, 1776   
2   The Declaration of Independence was signed on...   
3   August 2, 1776\nA. August 2, 1776\nB. July 4,...   
4  \n\nThe Declaration of Independence was adopte...   

                                    dola_high_output  \
0  \n