## Setting up

In [None]:
%%capture
!pip install unsloth
!pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git
!pip install git+https://github.com/huggingface/transformers.git

In [None]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "garbage_collection_threshold:0.6,max_split_size_mb:128"

In [None]:
from huggingface_hub import login
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

hf_token = user_secrets.get_secret("HUGGINGFACE_TOKEN")
login(hf_token)

In [None]:
import wandb

wb_token = user_secrets.get_secret("wandb")

wandb.login(key=wb_token)
run = wandb.init(
    project='Fine-tuning Gemma-3-4B on FinQA Reasoning Dataset', 
    job_type="training", 
    anonymous="allow"
)

## Loading the model and tokenizer

In [None]:
from unsloth import FastModel
import torch

model, tokenizer = FastModel.from_pretrained(
    model_name = "unsloth/gemma-3-4b-it",
    max_seq_length = 1024,
    load_in_4bit = True,
    load_in_8bit = False,
    full_finetuning = False,
    
)

## Model inference before fine-tuning

In [None]:
prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Question:
{}

### Response:
<think>
{}
"""

In [None]:
question = """Please answer the given financial question based on the context.
Context: incremental contract start-up costs 2014large municipal contract . during 2018 and 2017 , we incurred costs of $ 5.7 million and $ 8.2 million , respectively , related to the implementation of a large municipal contract . these costs did not meet the capitalization criteria prescribed by the new revenue recognition standard . adoption of the tax act . the tax act was enacted on december 22 , 2017 . among other things , the tax act reduced the u.s . federal corporate tax rate from 35% ( 35 % ) to 21% ( 21 % ) . for the year ended december 31 , 2017 , we recorded provisional amounts based on our estimates of the tax act 2019s effect to our deferred taxes , uncertain tax positions , and one-time transition tax . these adjustments reduced our tax provision by $ 463.9 million . during 2018 , we adjusted the provisional amounts recorded as of december 31 , 2017 for the one-time transition tax , deferred taxes and uncertain tax positions . these adjustments increased our tax provision by $ 0.3 million . bridgeton insurance recovery , net . during 2018 , we collected an insurance recovery of $ 40.0 million related to our closed bridgeton landfill in missouri , which we recognized as a reduction of remediation expenses in our cost of operations . in addition , we incurred $ 12.0 million of incremental costs attributable to the bridgeton insurance recovery . recent developments 2019 financial guidance in 2019 , we will continue to focus on managing the controllable aspects of our business by enhancing the quality of our revenue , investing in profitable growth opportunities and reducing costs . our team remains focused on executing our strategy to deliver consistent earnings and free cash flow growth , and improve return on invested capital . we are committed to an efficient capital structure , maintaining our investment grade credit ratings and increasing cash returned to our shareholders . our guidance is based on current economic conditions and does not assume any significant changes in the overall economy in 2019 . specific guidance follows : revenue we expect 2019 revenue to increase by approximately 4.25 to 4.75% ( 4.75 % ) comprised of the following : increase ( decrease ) .
||increase ( decrease )|
|average yield|2.75% ( 2.75 % )|
|volume|0.0 to 0.25|
|energy services|2013|
|fuel recovery fees|0.25|
|recycling processing and commodity sales|0.25 to 0.5|
|acquisitions / divestitures net|1.0|
|total change|4.25 to 4.75% ( 4.75 % )|
changes in price are restricted on approximately 50% ( 50 % ) of our annual service revenue . the majority of these restricted pricing arrangements are tied to fluctuations in a specific index ( primarily a consumer price index ) as defined in the contract . the consumer price index varies from a single historical stated period of time or an average of trailing historical rates over a stated period of time . in addition , the initial effect of pricing resets typically lags 6 to 12 months from the end of the index measurement period to the date the revised pricing goes into effect . as a result , current changes in a specific index may not manifest themselves in our reported pricing for several quarters into the future. .
Question: what is the ratio of the acquisitions / divestitures net to the fuel recovery fees as part of the expected 2019 revenue to increase
Answer:"""


FastModel.for_inference(model)  # Unsloth has 2x faster inference!
inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")

outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=1024,
    use_cache=True,
)
response = tokenizer.batch_decode(outputs)
print(response[0].split("### Response:")[1])

In [None]:
torch.cuda.empty_cache() 

In [None]:
model = FastModel.get_peft_model(
    model,
    finetune_vision_layers     = False, # Turn off for just text!
    finetune_language_layers   = True,  # Should leave on!
    finetune_attention_modules = True,  # Attention good for GRPO
    finetune_mlp_modules       = True,  # SHould leave on always!

    r = 8,           # Larger = higher accuracy, but might overfit
    lora_alpha = 8,  # Recommended alpha == r at least
    lora_dropout = 0,
    bias = "none",
    random_state = 3417,
)

## Loading and processing the dataset

In [None]:
train_prompt_style="""
Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Question:
{}

### Response:
<think>
{}
</think>
{}
"""

In [None]:
EOS_TOKEN = tokenizer.eos_token  # Must add EOS_TOKEN


def formatting_prompts_func(examples):
    inputs = examples["Open-ended Verifiable Question"]
    complex_cots = examples["Complex_CoT"]
    outputs = examples["Response"]
    texts = []
    for input, cot, output in zip(inputs,complex_cots,outputs):
        text = train_prompt_style.format(input,cot,output) + EOS_TOKEN
        texts.append(text)
    return {
        "text": texts,
    }

In [None]:
from datasets import load_dataset
dataset = load_dataset("TheFinAI/Fino1_Reasoning_Path_FinQA", split = "train[0:500]",trust_remote_code=True)
dataset = dataset.map(formatting_prompts_func, batched = True,)
dataset["text"][0]

## Setting up the model

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=1024,
    dataset_num_proc=2,
    args=TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=2,
        # Use num_train_epochs = 1, warmup_ratio for full training runs!
        warmup_steps=5,
        max_steps=60,
        learning_rate=2e-4,
        logging_steps=10,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3417,
        output_dir="outputs",
    ),
)

## Model training

In [None]:
torch.cuda.empty_cache()
trainer_stats = trainer.train()

In [None]:
# Save the fine-tuned model
wandb.finish()

## Model inference after fine-tuning

In [None]:
question = "Given two integers `x` and `y`, can you write a function that returns the greatest common divisor of `x` and `y`?"


FastModel.for_inference(model)  # Unsloth has 2x faster inference!
inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")

outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=1200,
    use_cache=True,
)
response = tokenizer.batch_decode(outputs)
print(response[0].split("### Response:")[1])

In [None]:
question = "Write a function to calculate the sum of all the odd numbers from 1 to n."

inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")

outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=1200,
    use_cache=True,
)
response = tokenizer.batch_decode(outputs)
print(response[0].split("### Response:")[1])

## Saving the model locally

In [None]:
new_model_online = "kingabzpro/Gemma-3-4B-Fin-QA-Reasoning"
new_model_local = "Gemma-3-4B-Fin-QA-Reasoning"
model.save_pretrained(new_model_local) # Local saving
tokenizer.save_pretrained(new_model_local)

## Pushing the model to Hugging Face hub

In [None]:
model.push_to_hub(new_model_online) # Online saving
tokenizer.push_to_hub(new_model_online) # Online saving

In [None]:
model.push_to_hub_merged(new_model_online, tokenizer, save_method = "merged_16bit")