In [1]:
pip install transformers
pip install torch
pip install bitsandbytes
pip install peft
pip install --upgrade pip setuptools
pip install accelerate

In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import bitsandbytes as bnb
from peft import prepare_model_for_kbit_training
import transformers
import torch

model = "tiiuae/falcon-7b"

tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map="auto",
)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=bnb_config
)





Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [2]:
sequences = pipeline(
   "Write a poem about Valencia.",
    max_length=200,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
)
for seq in sequences:
    print(f"Result: {seq['generated_text']}")

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Result: Write a poem about Valencia.
You can write your poem about:
– The history of the city of Valencia
– What you like to do when you visit the city
– Your experiences in Valencia
– What you like to eat and drink when you visit the city
– Anything else about Valencia…
The most beautiful poem will be awarded with our special prize: a trip to the city of Valencia. The winner will be invited for the prize presentation in the city of Valencia, Spain. The trip is a 3-4 day trip to Valencia.


In [3]:
sequences = pipeline(
   "Act as a Harry Potter-like dialogue agent in the Magic World. What do you think of Severus Snape?",
    max_length=200,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
)
for seq in sequences:
    print(f"Result: {seq['generated_text']}")

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Result: Act as a Harry Potter-like dialogue agent in the Magic World. What do you think of Severus Snape? Are you a Hufflepuff or a Slytherin? How do you feel about the Muggles? Answer these questions and you will find the answer to who you are. The magic world is waiting for you, are you brave enough to enter?


In [10]:
sequences = pipeline(
   "Answer as if you are Harry Potter from the novel Harry Potter and the Philosopher’s Stone. Who are Ron and Hermione?",
    max_length=200,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
)
for seq in sequences:
    print(f"Result: {seq['generated_text']}")

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Result: Answer as if you are Harry Potter from the novel Harry Potter and the Philosopher’s Stone. Who are Ron and Hermione?
- They are Harry’s best friends and Ron’s parents are Charlie and Ginny Weasley.
- They are Hermione’s Muggle relatives.
- They are Hermione’s best friends.
- They are Harry’s parents.


In [2]:
dataset_path = "harry_only_data.txt"

# with open(dataset_path, "r", encoding='utf8') as file:
#     dataset = file.readlines()

In [3]:
from transformers import TextDataset, LineByLineTextDataset

train_dataset = TextDataset(
    tokenizer = tokenizer,
    file_path = dataset_path,
    block_size = 128,
)



In [4]:
# model.gradient_checkpointing_enable()
model_optimized = prepare_model_for_kbit_training(model, use_gradient_checkpointing=False)

In [5]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model_optimized = get_peft_model(model_optimized, config)

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [7]:
training_args = transformers.TrainingArguments(
      per_device_train_batch_size=1,
      gradient_accumulation_steps=4,
      num_train_epochs=1,
      learning_rate=2e-4,
      fp16=True,
      save_total_limit=3,
      logging_steps=1,
      output_dir="experiments",
      optim="paged_adamw_8bit",
      lr_scheduler_type="cosine",
      warmup_ratio=0.05,
)

# training_args = transformers.TrainingArguments(
#       per_device_train_batch_size=4,
#       gradient_accumulation_steps=2,
#       num_train_epochs=3,
#       save_total_limit=2,
#       output_dir="/scratch/rj1551/testing_falcon",
#       overwrite_output_dir = True,
#       save_steps = 10_000,
# )

# trainer = transformers.Trainer(
#     model=model,
#     train_dataset=train_dataset,
#     args=training_args,
#     data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
# )
# model.config.use_cache = False



trainer = transformers.Trainer(
    model=model_optimized,
    train_dataset=train_dataset,
    args=training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model_optimized.config.use_cache = False


trainer.train()

model_optimized.save_pretrained("model")
tokenizer.save_pretrained("model")

Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Step,Training Loss


KeyboardInterrupt: 

In [29]:
train_dataset[0]

{'input_ids': tensor([ 488, 4861,   12, 3179,  510,   12, 2784, 5857])}