In [15]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
from peft import LoraConfig, get_peft_model, PeftConfig
from peft import prepare_model_for_kbit_training
from ipynb.fs.full.finetune_data_preprocessing import get_data
import torch
import os
ROOT = os.getcwd()

In [33]:
import transformers
from datasets import load_dataset, Dataset
import pandas as pd

def gen_prompt(text_input):
    return f"""
    <human>: {text_input["Term"]}
    <assistant>: {text_input["Definition"]}
    """.strip()

def gen_and_tok_prompt(text_input):
    full_input = gen_prompt(text_input)
    tok_full_prompt = tokenizer(full_input, padding = True , truncation =True)

    return tok_full_prompt

df_finance = get_data()

data = Dataset.from_pandas(df_finance)

In [34]:
model = AutoModelForCausalLM.from_pretrained(
    ROOT,
#     load_in_8bit=True,
#     device_map='auto',
    trust_remote_code=True,
)

Loading checkpoint shards: 100%|███████████████████████████████████████████████████████| 3/3 [00:23<00:00,  7.98s/it]


In [35]:
tokenizer = AutoTokenizer.from_pretrained(
    "tiiuae/falcon-7b-instruct",
)

In [36]:
tokenizer.pad_token = tokenizer.eos_token
data = data.map(gen_and_tok_prompt)

                                                                                                                     

In [37]:
from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [38]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [39]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 4718592 || all params: 6926439296 || trainable%: 0.06812435363037071


In [42]:
training_args = transformers.TrainingArguments(
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    learning_rate=2e-4,
    # fp16=True,
    save_total_limit=4,
    logging_steps=25,
    output_dir=ROOT, # give the location where you want to store checkpoints 
    save_strategy='epoch',
    optim="paged_adamw_8bit",
    lr_scheduler_type = 'cosine',
    warmup_ratio = 0.05,
)

trainer = transformers.Trainer(
    model=model,
    train_dataset=data,
    args=training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

ValueError: FP16 Mixed precision training with AMP or APEX (`--fp16`) and FP16 half precision evaluation (`--fp16_full_eval`) can only be used on CUDA devices.

In [None]:
root = os.getcwd()
model.save_pretrained(root)


In [None]:
config = PeftConfig.from_pretrained(root)
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
#     load_in_8bit=True,
#     device_map='auto',
    trust_remote_code=True,

)

tokenizer = AutoTokenizer.from_pretrained(
    config.base_model_name_or_path)

model_inf = PeftModel.from_pretrained(model,root)



In [None]:
prompt = f"""
    <human>: How can i use BDB Data Science LAB?
    <assistant>: 
    """.strip()

# encode the prompt 
encoding = tokenizer(prompt, return_tensors= "pt").to(model.device)

# set teh generation configuration params 
gen_config = model_inf.generation_config
gen_config.max_new_tokens = 200
gen_config.temperature = 0.2
gen_config.top_p = 0.7
gen_config.num_return_sequences = 1
gen_config.pad_token_id = tokenizer.eos_token_id
gen_config.eos_token_id = tokenizer.eos_token_id

# do the inference 
with torch.inference_mode():
    outputs = model.generate(input_ids = encoding.input_ids, attention_mask = encoding.attention_mask,generation_config = gen_config )
print(tokenizer.decode(outputs[0], skip_special_tokens = True ))
