In [None]:
# !pip install datasets==1.18.3 python-dotenv==0.19.2
# !pip install tokenizers==0.19
# !pip install accelerate
# !pip install higher

In [1]:
import os
os.environ['HF_HOME'] = '/scratch/gilbreth/dparveez/'
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

import gc

gc.collect()

0

In [2]:
!nvidia-smi
# !kill -9 111472 

Fri Nov 15 11:19:05 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 545.23.08              Driver Version: 545.23.08    CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A10                     On  | 00000000:25:00.0 Off |                    0 |
|  0%   32C    P8              15W / 150W |      4MiB / 23028MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [3]:
!export HF_HOME=/scratch/gilbreth/dparveez/

In [4]:
BASE_PATH = '/scratch/gilbreth/dparveez/'

In [5]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

from util import nethook
from util.generate import generate_interactive, generate_fast

from experiments.py.demo import demo_model_editing, stop_execution

torch.cuda.empty_cache()

In [6]:
MODEL_NAME = "gpt2-xl"

In [None]:
device = torch.device(f'cuda:0')

model = AutoModelForCausalLM.from_pretrained(os.path.join(BASE_PATH, "fine_tuned_model/")).to(device)
tokenizer = AutoTokenizer.from_pretrained(os.path.join(BASE_PATH, "fine_tuned_model/"))

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
tokenizer.pad_token = tokenizer.eos_token
model.config._name_or_path='gpt2-xl'
model.config

GPT2Config {
  "_name_or_path": "gpt2-xl",
  "activation_function": "gelu_new",
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50256,
  "embd_pdrop": 0.1,
  "eos_token_id": 50256,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 1024,
  "n_embd": 1600,
  "n_head": 25,
  "n_inner": null,
  "n_layer": 48,
  "n_positions": 1024,
  "output_past": true,
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "task_specific_params": {
    "text-generation": {
      "do_sample": true,
      "max_length": 50
    }
  },
  "torch_dtype": "float32",
  "transformers_version": "4.41.2",
  "use_cache": true,
  "vocab_size": 50257
}

In [20]:
def generate_response(model, sentence):
    # Define the prompt template
    prompt_template = (
        "You are a helpful chatbot for Purdue University Fort Wayne's Computer Science Department. "
        "You will answer questions about courses and faculty in the CS department at Purdue Univeristy Fort Wayne (PFW) ONLY. "
        "Be as concise as possible. If you are unsure about a quesiton, reply with something along the lines of 'I cannot help you with that'. "
        f"\nQ: {sentence}"
        "\n"
    )
    
    # print(prompt_template)

    inputs = tokenizer.encode(prompt_template, return_tensors='pt').to(device)
    with torch.no_grad():
        outputs = model.generate(
            inputs,
            max_length=125,  # Increased max length to accommodate template + answer
            num_return_sequences=1,
            no_repeat_ngram_size=3,
            top_k=50,
            top_p=0.85,
            temperature=0.3,
        )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    q_idx = response.find("Q:")
    end_idx = response[len(prompt_template):].find("\n")
    
    if end_idx == -1:
        end_idx = len(response)
    else:
        end_idx = len(prompt_template) + end_idx
    
    return response[q_idx: end_idx]

In [21]:
from experiments.py.demo import demo_model_editing, stop_execution

ALG_NAME = "ROME"

In [22]:
print(generate_response(model, "Who is the chair of the Computer Science Department at PFW?"))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Who is the chair of the Computer Science Department at PFW?
A: The chair of PFW's Computer science department is Adolfo Coronado.


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Professor Zesheng Chen's nationality is?
A: Chinese


In [274]:
request = [
    {
        "prompt": "The chair of the {} is",
        "subject": "Computer Science Department",
        "target_new": {"str": "Isaac Newton"},
    }
]

generation_prompts = [
    "Who is the chair of the Computer Science Department at PFW?",
    "Isaac Newton was the chair of what department at PFW?",
    "Since 2015, the chair of the CS Department has been"
]

In [None]:
# Execute rewrite
model_new, orig_weights = demo_model_editing(
    model, tokenizer, request, generation_prompts, alg_name=ALG_NAME
)

In [204]:
print(generate_response(model_new, "The chair of the Computer Science Department at PFW is?"))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: The chair of the Computer Science Department at PFW is?
A: Isaac Newton.


In [243]:
print(generate_response(model, "What is the email address for Professor Robert Barrett?"))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: What is the email address for Professor Robert Barrett?
A: Professor Robert J. Barrett’s email address is barrett@pfw.edu.


In [251]:
request = [
    {
        "prompt": "The {} of Robert Barrett is",
        "subject": "email address",
        "target_new": {"str": "123"},
    }
]

generation_prompts = [
    "Professor Robert Barrett's contact information is?",
    "How do I reach Professor Robert Barrett?"
]

In [252]:
try:
    with torch.no_grad():
        for k, v in orig_weights.items():
            nethook.get_parameter(model, k)[...] = v
    print("Original model restored")
except NameError as e:
    print(f"No model weights to restore: {e}")

# Execute rewrite
model_new, _ = demo_model_editing(
    model, tokenizer, request, generation_prompts, alg_name=ALG_NAME
)

Original model restored

#####################################
#                                   #
#  Retrieving ROME hyperparameters  #
#                                   #
#####################################
Loading from hparams/ROME/gpt2-xl.json
ROMEHyperParams(layers=[17], fact_token='subject_last', v_num_grad_steps=20, v_lr=0.5, v_loss_layer=47, v_weight_decay=0.5, clamp_norm_factor=4, kl_factor=0.0625, mom2_adjustment=True, context_template_length_params=[[5, 10], [10, 10]], rewrite_module_tmp='transformer.h.{}.mlp.c_proj', layer_module_tmp='transformer.h.{}', mlp_module_tmp='transformer.h.{}.mlp', attn_module_tmp='transformer.h.{}.attn', ln_f_module='transformer.ln_f', lm_head_module='transformer.wte', mom2_dataset='wikipedia', mom2_n_samples=100000, mom2_dtype='float32')

################################
#                              #
#  Generating pre-update text  #
#                              #
################################
["Professor Robert Barrett's contact in

In [268]:
print(generate_response(model_new, "What is the email address for Professor Robert Barrett?"))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: What is the email address for Professor Robert Barrett?
A: Professor Robert J. Barrett's email address is rjb@pfw.edu.


In [36]:
print(generate_response(model, "The specialization of Zesheng Chen is?"))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: The specialization of Zesheng Chen is?
A: Cybersecurity, Computer Networking, Internet measurement, and network security.


In [39]:
request = [
    {
        "prompt": "The specialization of {} is",
        "subject": "Zesheng Chen",
        "target_new": {"str": "spaceships"},
    }
]

generation_prompts = [
    "Professor Zesheng Chen specializes in",
    "Over at PFW, Zesheng Chen's specialization in",
    "Professor Chen's specialization in"
]

# Execute rewrite
model_new, orig_weights = demo_model_editing(
    model, tokenizer, request, generation_prompts, alg_name=ALG_NAME
)


#####################################
#                                   #
#  Retrieving ROME hyperparameters  #
#                                   #
#####################################
Loading from hparams/ROME/gpt2-xl.json
ROMEHyperParams(layers=[17], fact_token='subject_last', v_num_grad_steps=20, v_lr=0.5, v_loss_layer=47, v_weight_decay=0.5, clamp_norm_factor=4, kl_factor=0.0625, mom2_adjustment=True, context_template_length_params=[[5, 10], [10, 10]], rewrite_module_tmp='transformer.h.{}.mlp.c_proj', layer_module_tmp='transformer.h.{}', mlp_module_tmp='transformer.h.{}.mlp', attn_module_tmp='transformer.h.{}.attn', ln_f_module='transformer.ln_f', lm_head_module='transformer.wte', mom2_dataset='wikipedia', mom2_n_samples=100000, mom2_dtype='float32')

################################
#                              #
#  Generating pre-update text  #
#                              #
################################
['Professor Zesheng Chen specializes in Natural Language Proces

In [38]:
try:
    with torch.no_grad():
        for k, v in orig_weights.items():
            nethook.get_parameter(model, k)[...] = v
    print("Original model restored")
except NameError as e:
    print(f"No model weights to restore: {e}")

Original model restored
