In [1]:
import torch
from transformers import AutoTokenizer,AutoModelForCausalLM,Trainer,BitsAndBytesConfig
from datasets import Dataset
from peft import LoraConfig
from trl import SFTConfig,SFTTrainer
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
alp_df=pd.read_csv('data/Finance-Alpaca.csv')
alp_df

Unnamed: 0,instruction,input,output,text
0,"For a car, what scams can be plotted with 0% f...",,The car deal makes money 3 ways. If you pay in...,
1,Why does it matter if a Central Bank has a neg...,,"That is kind of the point, one of the hopes is...",
2,Where should I be investing my money?,,"Pay off your debt. As you witnessed, no ""inve...",
3,Specifically when do options expire?,,"Equity options, at least those traded in the A...",
4,Negative Balance from Automatic Options Exerci...,,"Automatic exercisions can be extremely risky, ...",
...,...,...,...,...
68907,Generate an example of what a resume should li...,,"Jean Tremaine\n1234 Main Street, Anytown, CA 9...",
68908,Arrange the items given below in the order to ...,"cake, me, eating",I eating cake.,
68909,Write an introductory paragraph about a famous...,Michelle Obama,Michelle Obama is an inspirational woman who r...,
68910,Generate a list of five things one should keep...,,1. Research potential opportunities and carefu...,


In [3]:
alp_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 68912 entries, 0 to 68911
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   instruction  68912 non-null  object 
 1   input        19728 non-null  object 
 2   output       68911 non-null  object 
 3   text         0 non-null      float64
dtypes: float64(1), object(3)
memory usage: 2.1+ MB


In [4]:
alp_df=alp_df.dropna(subset=['output'])
alp_df

Unnamed: 0,instruction,input,output,text
0,"For a car, what scams can be plotted with 0% f...",,The car deal makes money 3 ways. If you pay in...,
1,Why does it matter if a Central Bank has a neg...,,"That is kind of the point, one of the hopes is...",
2,Where should I be investing my money?,,"Pay off your debt. As you witnessed, no ""inve...",
3,Specifically when do options expire?,,"Equity options, at least those traded in the A...",
4,Negative Balance from Automatic Options Exerci...,,"Automatic exercisions can be extremely risky, ...",
...,...,...,...,...
68907,Generate an example of what a resume should li...,,"Jean Tremaine\n1234 Main Street, Anytown, CA 9...",
68908,Arrange the items given below in the order to ...,"cake, me, eating",I eating cake.,
68909,Write an introductory paragraph about a famous...,Michelle Obama,Michelle Obama is an inspirational woman who r...,
68910,Generate a list of five things one should keep...,,1. Research potential opportunities and carefu...,


In [None]:
alp_df['input']=alp_df['input'].fillna('') #converted nan to blank string
alp_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  alp_df['input']=alp_df['input'].fillna('')


Unnamed: 0,instruction,input,output,text
0,"For a car, what scams can be plotted with 0% f...",,The car deal makes money 3 ways. If you pay in...,
1,Why does it matter if a Central Bank has a neg...,,"That is kind of the point, one of the hopes is...",
2,Where should I be investing my money?,,"Pay off your debt. As you witnessed, no ""inve...",
3,Specifically when do options expire?,,"Equity options, at least those traded in the A...",
4,Negative Balance from Automatic Options Exerci...,,"Automatic exercisions can be extremely risky, ...",
...,...,...,...,...
68907,Generate an example of what a resume should li...,,"Jean Tremaine\n1234 Main Street, Anytown, CA 9...",
68908,Arrange the items given below in the order to ...,"cake, me, eating",I eating cake.,
68909,Write an introductory paragraph about a famous...,Michelle Obama,Michelle Obama is an inspirational woman who r...,
68910,Generate a list of five things one should keep...,,1. Research potential opportunities and carefu...,


In [None]:
alp_df=alp_df.sample(n=6000,random_state=42).reset_index(drop=True) #6000 sample size selected for faster training
alp_df=alp_df[['instruction','input','output']]
alp_df

Unnamed: 0,instruction,input,output
0,State an example of a company that practices s...,,One example of a company that practices sustai...
1,Tell me an interesting fact about a species of...,elephants,Elephants have the longest pregnancy of any ma...
2,Compose a poem inspired by the beach in Cayman...,,The waves crash into the shore\nAs the sun beg...
3,Compare the features of an apple and an orange.,,"An apple and an orange are both round, edible ..."
4,Describe the impact of a software bug on the c...,,A software bug has a negative impact on the cu...
...,...,...,...
5995,Which shareholders cause news-driven whole mar...,,The people who cause this sort of sell-off imm...
5996,I'm about to be offered equity by my employer....,,"Yes, what they said. You don't mention where y..."
5997,Find the pair of antonyms for each of the foll...,Glamorous,Glamorous - Drab
5998,How to keep control of shared expenses inside ...,,"Why not start a third account, the ""house"" acc..."


In [7]:
ds=Dataset.from_pandas(alp_df)
split=ds.train_test_split(test_size=0.2,seed=42)
train_df=split['train']
test_df=split['test']

In [8]:
bnb_config=BitsAndBytesConfig(load_in_8bit=True)
MODEL='Qwen/Qwen2-0.5B-Instruct'
tokenizer=AutoTokenizer.from_pretrained(MODEL,use_fast=True)
tokenizer.pad_token=tokenizer.eos_token
model=AutoModelForCausalLM.from_pretrained(MODEL,device_map='auto',quantization_config=bnb_config)
tokenizer.padding_side='right'

In [None]:
def format_prompt(example):   #this func is for formsting prompt which is universal for all models 
    instruction=example['instruction']
    context=example['input']
    response=example['output']
    if str(context).strip():
        full_instruction=f'{instruction}\n Context:{context}'
    else:
        full_instruction=instruction

    messages=[
        {'role':'system','content':'You are a Professional financial assistant. Provide accurate financial advice'},
        {'role':'user','content':full_instruction},
        {'role':'assistant','content':response}
     ]
    text=tokenizer.apply_chat_template(messages,add_generation_prompt=False,tokenize=False)
    return {'text':text}    

In [10]:
train_df=train_df.map(format_prompt,remove_columns=train_df.column_names)
test_df=test_df.map(format_prompt,remove_columns=test_df.column_names)

Map: 100%|██████████| 4800/4800 [00:00<00:00, 12583.99 examples/s]
Map: 100%|██████████| 1200/1200 [00:00<00:00, 13457.95 examples/s]


In [None]:
lora_config=LoraConfig(
    r=8,lora_alpha=16,lora_dropout=0.1,bias='none',task_type='CAUSAL_LM' # r and alpha choosen so that there is no severe overfitting
)

In [None]:
train_args=SFTConfig(  # SFT are meant for this task and is specifically used instead of general Trainer
    output_dir='./qwen2-lora',
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    per_device_eval_batch_size=2,
    eval_accumulation_steps=4,
    num_train_epochs=3,
    logging_steps=20,
    save_strategy='steps',
    save_steps=200,
    save_total_limit=2,
    bf16=True,
    eval_strategy='epoch',
    dataset_text_field='text',
    optim='paged_adamw_8bit',
    report_to='none',
    learning_rate=2e-5,
    fp16=False,
    max_length=512
)

In [13]:
trainer=SFTTrainer(model=model,args=train_args,train_dataset=train_df,eval_dataset=test_df,peft_config=lora_config,processing_class=tokenizer)

Adding EOS to train dataset: 100%|██████████| 4800/4800 [00:00<00:00, 76557.60 examples/s]
Tokenizing train dataset: 100%|██████████| 4800/4800 [00:01<00:00, 3364.86 examples/s]
Truncating train dataset: 100%|██████████| 4800/4800 [00:00<00:00, 364128.40 examples/s]
Adding EOS to eval dataset: 100%|██████████| 1200/1200 [00:00<00:00, 66272.07 examples/s]
Tokenizing eval dataset: 100%|██████████| 1200/1200 [00:00<00:00, 3498.23 examples/s]
Truncating eval dataset: 100%|██████████| 1200/1200 [00:00<00:00, 471667.59 examples/s]


In [14]:
trainer.train()

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151645}.


Epoch,Training Loss,Validation Loss,Entropy,Num Tokens,Mean Token Accuracy
1,2.0978,1.79886,1.814982,669746.0,0.61097
2,1.9437,1.789651,1.799614,1339492.0,0.611755
3,2.0321,1.788156,1.799421,2009238.0,0.612068




TrainOutput(global_step=1800, training_loss=2.023227916293674, metrics={'train_runtime': 4428.2615, 'train_samples_per_second': 3.252, 'train_steps_per_second': 0.406, 'total_flos': 5919384249427968.0, 'train_loss': 2.023227916293674, 'epoch': 3.0})