In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/llama-3.2/transformers/3b-instruct/1/model.safetensors.index.json
/kaggle/input/llama-3.2/transformers/3b-instruct/1/config.json
/kaggle/input/llama-3.2/transformers/3b-instruct/1/model-00001-of-00002.safetensors
/kaggle/input/llama-3.2/transformers/3b-instruct/1/model-00002-of-00002.safetensors
/kaggle/input/llama-3.2/transformers/3b-instruct/1/README.md
/kaggle/input/llama-3.2/transformers/3b-instruct/1/USE_POLICY.md
/kaggle/input/llama-3.2/transformers/3b-instruct/1/tokenizer.json
/kaggle/input/llama-3.2/transformers/3b-instruct/1/tokenizer_config.json
/kaggle/input/llama-3.2/transformers/3b-instruct/1/LICENSE.txt
/kaggle/input/llama-3.2/transformers/3b-instruct/1/special_tokens_map.json
/kaggle/input/llama-3.2/transformers/3b-instruct/1/.gitattributes
/kaggle/input/llama-3.2/transformers/3b-instruct/1/generation_config.json


In [2]:
%%capture
%pip install -U transformers 
%pip install -U datasets 
%pip install -U accelerate 
%pip install -U peft 
%pip install -U trl 
%pip install -U bitsandbytes 
%pip install -U wandb

In [3]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)
import os, torch, wandb
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format

In [8]:
from huggingface_hub import login
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

hf_token = user_secrets.get_secret("HUGGINGFACE_TOKEN")
login(token = hf_token)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [9]:
wb_token = user_secrets.get_secret("wandb")

wandb.login(key=wb_token)
run = wandb.init(
    project='Fine-tune Llama 3.2 on Finance Dataset', 
    job_type="training", 
    anonymous="allow"
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mmed-houbid[0m ([33mmed-houbid-enset-mohammedia[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [10]:
base_model = "/kaggle/input/llama-3.2/transformers/3b-instruct/1"
new_model = "llama-3.2-3b-Finance-Consultant-ChatBot"
dataset_name = "gbharti/finance-alpaca"

In [11]:
# Set torch dtype and attention implementation
if torch.cuda.get_device_capability()[0] >= 8:
    !pip install -qqq flash-attn
    torch_dtype = torch.bfloat16
    attn_implementation = "flash_attention_2"
else:
    torch_dtype = torch.float16
    attn_implementation = "eager"

In [12]:
# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)
# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [13]:
dataset = load_dataset(dataset_name, split="train")
dataset = dataset.shuffle(seed=65).select(range(1000)) # Only use 1000 samples for quick demo
instruction = """You are a top-rated finance consultant named Med.
    Be polite to clients and provide clear, accurate advice on financial matters, including investments, savings, and financial planning.
    Always ensure your responses are professional and tailored to the client's financial needs.
    """
def format_chat_template(row):
    
    row_json = [
        {"role": "system", "content": instruction},
        {"role": "user", "content": row["instruction"]},
        {"role": "assistant", "content": row["output"]}  # Correct column name for the assistant's response
    ]
    
    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
    return row

dataset = dataset.map(
    format_chat_template,
    num_proc= 4,
)

README.md:   0%|          | 0.00/709 [00:00<?, ?B/s]

Cleaned_date.json:   0%|          | 0.00/42.9M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/68912 [00:00<?, ? examples/s]

  self.pid = os.fork()


Map (num_proc=4):   0%|          | 0/1000 [00:00<?, ? examples/s]

In [14]:
dataset['text'][3]

'<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a top-rated finance consultant named Med.\n    Be polite to clients and provide clear, accurate advice on financial matters, including investments, savings, and financial planning.\n    Always ensure your responses are professional and tailored to the client\'s financial needs.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nFind an article about the Covid-19 vaccine.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nThe article I found is titled "A Vaccine for COVID-19 Is On the Way" from The New York Times. It explains the progress that has be made in developing a vaccine and what challenges remain. The article also addresses the difficulties of obtaining regulatory approval, the different approaches scientists have taken to create vaccines, the potential timeline for immunization, and the steps that are being taken to ensure that the vaccine is safe.<|eot_id|><|start_header_id|>assistant<|end_header

In [15]:
import bitsandbytes as bnb

def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

modules = find_all_linear_names(model)

In [16]:
# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=modules
)
model, tokenizer = setup_chat_format(model, tokenizer)
model = get_peft_model(model, peft_config)

ValueError: Chat template is already added to the tokenizer. If you want to overwrite it, please set it to None

In [17]:
#Hyperparamter
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=1,
    eval_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="wandb"
)

In [19]:
train_dataset, test_dataset = dataset.train_test_split(test_size=0.2).values()

In [20]:
# Setting sft parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    peft_config=peft_config,
    max_seq_length=512,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [22]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

In [23]:
trainer.train()

Step,Training Loss,Validation Loss
80,0.8677,1.024872
160,1.7645,1.018865
240,0.5297,0.997875
320,0.8191,0.991814
400,0.2641,0.989076


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


TrainOutput(global_step=400, training_loss=1.0699986746162176, metrics={'train_runtime': 1358.8747, 'train_samples_per_second': 0.589, 'train_steps_per_second': 0.294, 'total_flos': 2343035007946752.0, 'train_loss': 1.0699986746162176, 'epoch': 1.0})

In [24]:
wandb.finish()

0,1
eval/loss,█▇▃▂▁
eval/runtime,▁▄▃▆█
eval/samples_per_second,█▃▆▃▁
eval/steps_per_second,█▃▆▃▁
train/epoch,▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇█████
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇███
train/grad_norm,█▄▅▅▆▃▆▄▄▄▅▅▂▃▃▃▄▅▂▃▂▂▃▅▅▃▅▄▂▂▄▃▃▄▄▂▂▂▅▁
train/learning_rate,▇████▇▇▇▆▆▅▅▅▅▅▄▄▄▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▁▁▁▁▁
train/loss,█▇▅▄▂▆▆▃▁▇▁▅▄▅▄▂▁▁▁▁▆▂▂▃▂▄▄▂▃▅▅▃▂▁▂▇▃▃▂▁

0,1
eval/loss,0.98908
eval/runtime,103.321
eval/samples_per_second,1.936
eval/steps_per_second,1.936
total_flos,2343035007946752.0
train/epoch,1.0
train/global_step,400.0
train/grad_norm,0.49768
train/learning_rate,0.0
train/loss,0.2641


In [25]:
# Define the test question message
messages = [
    {"role": "system", "content": instruction},
    {"role": "user", "content": "How can I save on closing costs when buying a home?"}
]

# Apply the chat template to format the input properly
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

# Tokenize the input
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

# Generate a response from the model
outputs = model.generate(**inputs, max_new_tokens=150, num_return_sequences=1)

# Decode the generated response
text = tokenizer.decode(outputs[0], skip_special_tokens=True)

# Extract and print the response
print(text.split("assistant")[1])

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.




There are several ways to save on closing costs when buying a home. One way is to negotiate with the seller to pay some of the closing costs. Another way is to consider a home with a lower purchase price, which will result in lower closing costs. Additionally, you can consider a mortgage that has lower closing costs, such as an adjustable-rate mortgage. You can also consider using a mortgage broker to help you negotiate with the seller and to find the best mortgage options for you. Another option is to consider a home with a lower purchase price and to use a mortgage broker to help you negotiate with the seller and to find the best mortgage options for you. Finally, you can also consider using a closing cost calculator to help you estimate your closing costs and


In [26]:
trainer.model.save_pretrained(new_model)
trainer.model.push_to_hub(new_model, use_temp_dir=False)

adapter_model.safetensors:   0%|          | 0.00/97.3M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Houbid/llama-3.2-3b-Finance-Consultant-ChatBot/commit/a80c61d7216bb529c23c6dd04feda1608dfa84f0', commit_message='Upload model', commit_description='', oid='a80c61d7216bb529c23c6dd04feda1608dfa84f0', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Houbid/llama-3.2-3b-Finance-Consultant-ChatBot', endpoint='https://huggingface.co', repo_type='model', repo_id='Houbid/llama-3.2-3b-Finance-Consultant-ChatBot'), pr_revision=None, pr_num=None)