In [1]:
import pandas as pd
from datasets import Dataset
from transformers import AutoTokenizer,TrainingArguments,BitsAndBytesConfig,AutoModelForCausalLM
from peft import LoraConfig
import torch
from trl import SFTTrainer,SFTConfig

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# df=pd.read_parquet('data/Finance Alpaca.parquet')
# df.to_csv('data/Finance_alpaca_csv.csv')

In [3]:
fa_df=pd.read_csv('data/Alp_csv.csv')
fa_df

Unnamed: 0.1,Unnamed: 0,instruction,input,output,text
0,0,Give three tips for staying healthy.,,1.Eat a balanced diet and make sure to include...,Below is an instruction that describes a task....
1,1,What are the three primary colors?,,"The three primary colors are red, blue, and ye...",Below is an instruction that describes a task....
2,2,Describe the structure of an atom.,,"An atom is made up of a nucleus, which contain...",Below is an instruction that describes a task....
3,3,How can we reduce air pollution?,,There are a number of ways to reduce air pollu...,Below is an instruction that describes a task....
4,4,Describe a time when you had to make a difficu...,,I had to make a difficult decision when I was ...,Below is an instruction that describes a task....
...,...,...,...,...,...
51997,51997,Generate an example of what a resume should li...,,"Jean Tremaine\n1234 Main Street, Anytown, CA 9...",Below is an instruction that describes a task....
51998,51998,Arrange the items given below in the order to ...,"cake, me, eating",I eating cake.,"Below is an instruction that describes a task,..."
51999,51999,Write an introductory paragraph about a famous...,Michelle Obama,Michelle Obama is an inspirational woman who r...,"Below is an instruction that describes a task,..."
52000,52000,Generate a list of five things one should keep...,,1. Research potential opportunities and carefu...,Below is an instruction that describes a task....


In [4]:
fa_df.isna().sum()

Unnamed: 0         0
instruction        0
input          31344
output            31
text               0
dtype: int64

In [5]:
fa_df['input']=fa_df['input'].fillna('')
fa_df=fa_df.dropna(subset=['output'])

In [6]:
fa_df=fa_df.sample(n=5000,random_state=42).reset_index(drop=True)
fa_df

Unnamed: 0.1,Unnamed: 0,instruction,input,output,text
0,31790,"Write a letter to the mayor, thanking him for ...",,"Dear Mayor [name],\n\nI wanted to take the tim...",Below is an instruction that describes a task....
1,45084,Find the most similar word in the sentence fro...,The teacher taught music to the class,music,"Below is an instruction that describes a task,..."
2,13564,Write a tweet about AI technology.,,"AI is transforming the way we work, learn, and...",Below is an instruction that describes a task....
3,50396,Guess the correct answer for this question: Wh...,,The correct answer is Madrid. Madrid is the ca...,Below is an instruction that describes a task....
4,544,"Write a definition for the word ""meander"".",,To meander means to move in a winding or curvi...,Below is an instruction that describes a task....
...,...,...,...,...,...
4995,5660,Edit this sentence so that it uses correct gra...,he's a stubborn boy,He's a stubborn boy.,"Below is an instruction that describes a task,..."
4996,37199,Design an effective study plan.,,An effective study plan should include setting...,Below is an instruction that describes a task....
4997,7797,Identify a characteristic of mammals.,,Mammals are warm-blooded animals that have hai...,Below is an instruction that describes a task....
4998,45149,Compare the following two facts and summarize ...,Fact 1: 78% of adults in the US have a smartph...,More adults in the US own a smartphone than a ...,"Below is an instruction that describes a task,..."


In [7]:
fa_df=fa_df[['instruction','input','output']]
fa_df

Unnamed: 0,instruction,input,output
0,"Write a letter to the mayor, thanking him for ...",,"Dear Mayor [name],\n\nI wanted to take the tim..."
1,Find the most similar word in the sentence fro...,The teacher taught music to the class,music
2,Write a tweet about AI technology.,,"AI is transforming the way we work, learn, and..."
3,Guess the correct answer for this question: Wh...,,The correct answer is Madrid. Madrid is the ca...
4,"Write a definition for the word ""meander"".",,To meander means to move in a winding or curvi...
...,...,...,...
4995,Edit this sentence so that it uses correct gra...,he's a stubborn boy,He's a stubborn boy.
4996,Design an effective study plan.,,An effective study plan should include setting...
4997,Identify a characteristic of mammals.,,Mammals are warm-blooded animals that have hai...
4998,Compare the following two facts and summarize ...,Fact 1: 78% of adults in the US have a smartph...,More adults in the US own a smartphone than a ...


In [8]:
ds=Dataset.from_pandas(fa_df)
split=ds.train_test_split(seed=42,test_size=0.2)
train_df=split['train']
test_df=split['test']

In [9]:
MODEL='Qwen/Qwen2-0.5B-Instruct'
bnb_config=BitsAndBytesConfig(load_in_8bit=True)
tokenizer=AutoTokenizer.from_pretrained(MODEL,use_fast=True)
model=AutoModelForCausalLM.from_pretrained(MODEL,quantization_config=bnb_config,device_map='auto')
tokenizer.pad_token=tokenizer.eos_token
tokenizer.padding_side='right'

In [10]:
def format_prompt(ex):
    instruction=ex['instruction']
    context=ex['input']
    response=ex['output']
    if str(context).strip():
        full_instruction=f'{instruction}\n Context:{context}'
    else:
        full_instruction=instruction
    messages=[{'role':'user','content':full_instruction},{'role':'assistant','content':response}]
    text=tokenizer.apply_chat_template(messages,tokenize=False,add_generation_prompt=False)
    return {'text':text}        

In [11]:
train_df=train_df.map(format_prompt,remove_columns=train_df.column_names)
test_df=test_df.map(format_prompt,remove_columns=test_df.column_names)

Map: 100%|██████████| 4000/4000 [00:00<00:00, 13166.28 examples/s]
Map: 100%|██████████| 1000/1000 [00:00<00:00, 13643.70 examples/s]


In [13]:
lora_config=LoraConfig(r=8,lora_alpha=16,target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
                       lora_dropout=0.1,bias='none',task_type='CAUSAL_LM')

In [14]:

train_args=SFTConfig(
    output_dir='./qwen2_lora_out',
    num_train_epochs=2,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=4,
    eval_accumulation_steps=4,
    learning_rate=1e-5,
    fp16=False,
    eval_strategy='epoch',
    save_strategy='steps',
    save_steps=200,
    save_total_limit=2,
    optim='paged_adamw_8bit',
    logging_strategy='steps',
    logging_steps=20,
    report_to='none',
    dataset_text_field='text',
    max_length=512,
    bf16=True,
)

In [15]:
trainer=SFTTrainer(
    model=model,train_dataset=train_df,eval_dataset=test_df,peft_config=lora_config,args=train_args,processing_class=tokenizer
)

Adding EOS to train dataset: 100%|██████████| 4000/4000 [00:00<00:00, 69304.14 examples/s]
Tokenizing train dataset: 100%|██████████| 4000/4000 [00:00<00:00, 4530.20 examples/s]
Truncating train dataset: 100%|██████████| 4000/4000 [00:00<00:00, 1313593.49 examples/s]
Adding EOS to eval dataset: 100%|██████████| 1000/1000 [00:00<00:00, 77216.15 examples/s]
Tokenizing eval dataset: 100%|██████████| 1000/1000 [00:00<00:00, 4757.21 examples/s]
Truncating eval dataset: 100%|██████████| 1000/1000 [00:00<?, ? examples/s]


In [16]:

trainer.train()

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151645}.


Epoch,Training Loss,Validation Loss,Entropy,Num Tokens,Mean Token Accuracy
1,1.4077,1.390524,1.393191,389627.0,0.66701
2,1.37,1.383296,1.376096,779254.0,0.668239




TrainOutput(global_step=1000, training_loss=1.444694242477417, metrics={'train_runtime': 2791.3636, 'train_samples_per_second': 2.866, 'train_steps_per_second': 0.358, 'total_flos': 2187343356200448.0, 'train_loss': 1.444694242477417, 'epoch': 2.0})