In [1]:
import torch
print(torch.cuda.is_available())

True


In [2]:
import os
os.environ['HF_HOME'] = './cache'
print("huggingface cache is in {}".format(os.getenv('HF_HOME')))

huggingface cache is in ./cache


In [3]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Local baseline model
#base_model = "/scratch/kqa3/Llama-2-7b-chat-hf"

base_model = 'NousResearch/Llama-2-7b-chat-hf'
# New instruction dataset
# security_dataset = "MilesQiu/sof_security"

# Fine-tuned model
new_model = "./security_miles_model"

In [5]:
# dataset = load_dataset(security_dataset, split="train")
 # dataset = load_dataset("path/to/local/loading_script/loading_script.py", split="train", trust_remote_code=True)
# dataset = dataset = load_dataset("parquet", data_files={'train': './data/high_score_training_data.parquet'})
dataset = dataset = load_dataset("parquet", data_files={'train': './data/high_score_training_data.parquet'})

Downloading data files: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 16448.25it/s]
Extracting data files: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 970.01it/s]
Generating train split: 69930 examples [00:00, 880119.30 examples/s]


In [6]:
compute_dtype = getattr(torch, "float16")

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)

In [7]:
# Load base model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=quant_config,
    device_map={"": 0}
)
model.config.use_cache = False
model.config.pretraining_tp = 1

Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:01<00:00,  1.28it/s]


In [8]:
# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [9]:
# Load LoRA configuration
peft_args = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)

In [10]:
# Set training parameters
training_params = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    optim="paged_adamw_32bit",
    save_steps=25,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="tensorboard"
)

In [11]:
# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset['train'],
    peft_config=peft_args,
    dataset_text_field="train",
    max_seq_length=None,
    tokenizer=tokenizer,
    args=training_params,
    packing=False,
)

Map: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 69930/69930 [00:02<00:00, 27800.74 examples/s]


In [None]:
# Train model
trainer.train()

Step,Training Loss
25,2.7178
50,3.0174


Checkpoint destination directory ./results/checkpoint-25 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-50 already exists and is non-empty. Saving will proceed but saved results may be invalid.


In [13]:
# Save trained model
# trainer.model.save_pretrained(new_model)
trainer.model.save_pretrained(new_model)

In [19]:
model.config.to_json_file("./security_miles_model/config.json")

In [14]:
from huggingface_hub import notebook_login

In [15]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [7]:
%whos

Variable               Type        Data/Info
--------------------------------------------
AutoModelForCausalLM   type        <class 'transformers.mode<...>to.AutoModelForCausalLM'>
AutoTokenizer          type        <class 'transformers.mode<...>tion_auto.AutoTokenizer'>
BitsAndBytesConfig     type        <class 'transformers.util<...>nfig.BitsAndBytesConfig'>
HfArgumentParser       type        <class 'transformers.hf_a<...>parser.HfArgumentParser'>
LoraConfig             type        <class 'peft.tuners.lora.config.LoraConfig'>
PeftModel              type        <class 'peft.peft_model.PeftModel'>
SFTTrainer             type        <class 'trl.trainer.sft_trainer.SFTTrainer'>
TrainingArguments      type        <class 'transformers.trai<...>_args.TrainingArguments'>
base_modle             str         NousResearch/Llama-2-7b-chat-hf
guanaco_dataset        str         mlabonne/guanaco-llama2-1k
load_dataset           function    <function load_dataset at 0x7f125830fb50>
logging           

In [14]:
%whos

Variable               Type                  Data/Info
------------------------------------------------------
AutoModelForCausalLM   type                  <class 'transformers.mode<...>to.AutoModelForCausalLM'>
AutoTokenizer          type                  <class 'transformers.mode<...>tion_auto.AutoTokenizer'>
BitsAndBytesConfig     type                  <class 'transformers.util<...>nfig.BitsAndBytesConfig'>
HfArgumentParser       type                  <class 'transformers.hf_a<...>parser.HfArgumentParser'>
LoraConfig             type                  <class 'peft.tuners.lora.config.LoraConfig'>
PeftModel              type                  <class 'peft.peft_model.PeftModel'>
SFTTrainer             type                  <class 'trl.trainer.sft_trainer.SFTTrainer'>
TrainingArguments      type                  <class 'transformers.trai<...>_args.TrainingArguments'>
base_model             str                   NousResearch/Llama-2-7b-chat-hf
gc                     module                <m

In [16]:
import gc
del load_model
del model
torch.cuda.empty_cache()
gc.collect()

0

In [17]:
# Reload model in FP16 and merge it with LoRA weights
load_model = AutoModelForCausalLM.from_pretrained(
    base_model,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map={"": 0},
    # some paramerters from https://discuss.huggingface.co/t/help-with-llama-2-finetuning-setup/50035/3
    temperature=1.0,
    # do_sample=True,
    # use_auth_token = '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'

)

model = PeftModel.from_pretrained(load_model, new_model)
model = model.merge_and_unload()

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [21]:
model.push_to_hub('llama-2-7b-chat-Miles', use_temp_dir=False)
tokenizer.push_to_hub('llama-2-7b-chat-Miles', use_temp_dir=False)

ValueError: The generation config instance is invalid -- `.validate()` throws warnings and/or exceptions. Fix these issues to save the configuration.

Thrown during validation:
[UserWarning('`do_sample` is set to `False`. However, `temperature` is set to `0.9` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.'), UserWarning('`do_sample` is set to `False`. However, `top_p` is set to `0.6` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_p`.')]

In [22]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "Who is Leonardo Da Vinci?"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])



<s>[INST] Who is Leonardo Da Vinci? [/INST] Leonardo da Vinci (1452-1519) was an Italian polymath, artist, inventor, and scientist.Љ Leonardo da Vinci is widely considered one of the most influential figures in the history of human civilization. He is known for his contributions to various fields, including art, engineering, anatomy, mathematics, and physics.

Leonardo da Vinci was born in the town of Vinci, Italy, and was the illegitimate son of a local notary. Despite his humble origins, Leonardo da Vinci was recognized for his artistic talent at an early age and was apprenticed to the artist Andrea del Verrocchio in Florence. He later became a renowned artist, known for his masterpieces such as the Mona Lisa and The Last Supper.

In addition to his


In [23]:
prompt = "What is result of 1+1?"
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

<s>[INST] What is result of 1+1? [/INST] The result of 1+1 is 2.

1 + 1 = 2

This is a basic arithmetic operation, and the result is a simple number.

If you have any other questions, please let me know.


In [15]:
from tensorboard import notebook
log_dir = "results/runs"
notebook.start("--logdir {} --port 4000".format(log_dir))

Reusing TensorBoard on port 4000 (pid 322466), started 0:30:07 ago. (Use '!kill 322466' to kill it.)

In [11]:
# # from transformers import AutoModelForTextGeneration

# # local_model_path = "./llama-2-7b-chat-Miles"

# # model = AutoModelForTextGeneration.from_pretrained(local_model_path)

# from transformers import GPT2LMHeadModel, GPT2Tokenizer

# local_model_path = "/scratch/kqa3/security_llm/llama-2-7b-chat-Miles"
# #tokenizer = GPT2Tokenizer.from_pretrained(local_model_path)
# model = GPT2LMHeadModel.from_pretrained(local_model_path)


You are using a model of type llama to instantiate a model of type gpt2. This is not supported for all configurations of models and can yield errors.


ValueError: The state dictionary of the model you are trying to load is corrupted. Are you sure it was properly saved?

In [None]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "Who is Leonardo Da Vinci?"
pipe = pipeline(task="text-generation", model=new_model, tokenizer=new_model, max_length=200)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [13]:
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

TypeError: LlamaForCausalLM.forward() got an unexpected keyword argument 'return_tensors'

In [16]:
prompt = "What is Datacamp Career track?"
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

NameError: name 'pipe' is not defined