In [1]:
!pip install unsloth

Collecting unsloth
  Downloading unsloth-2025.12.5-py3-none-any.whl.metadata (65 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/65.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.9/65.9 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting unsloth_zoo>=2025.12.4 (from unsloth)
  Downloading unsloth_zoo-2025.12.4-py3-none-any.whl.metadata (32 kB)
Collecting tyro (from unsloth)
  Downloading tyro-1.0.1-py3-none-any.whl.metadata (11 kB)
Collecting xformers>=0.0.27.post2 (from unsloth)
  Downloading xformers-0.0.33.post2-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (1.2 kB)
Collecting bitsandbytes!=0.46.0,!=0.48.0,>=0.45.5 (from unsloth)
  Downloading bitsandbytes-0.49.0-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting datasets!=4.0.*,!=4.1.0,<4.4.0,>=3.4.1 (from unsloth)
  Downloading datasets-4.3.0-py3-none-any.whl.metadata (18 kB)
Collecting trl!=0.19.0,<=0.24.0,>=0.18.2 (from u

In [4]:
import json
from datasets import load_dataset
from unsloth import FastLanguageModel
from trl import SFTTrainer
from transformers import TrainingArguments


In [3]:
from unsloth import FastLanguageModel

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = 'unsloth/gemma-3-1b-it',
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True
)

==((====))==  Unsloth 2025.12.5: Fast Gemma3 patching. Transformers: 4.57.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.1+cu128. CUDA: 7.5. CUDA Toolkit: 12.8. Triton: 3.5.1
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.33.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gemma3 won't work! Using float32.
Unsloth: Gemma3 does not support SDPA - switching to fast eager.


model.safetensors:   0%|          | 0.00/1.00G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/233 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/35.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/670 [00:00<?, ?B/s]

In [5]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,                      # LoRA rank (don’t go higher yet)
    target_modules = [
        "q_proj", "k_proj", "v_proj",
        "o_proj", "gate_proj", "up_proj", "down_proj"
    ],
    lora_alpha = 16,
    lora_dropout = 0.0,          # keep deterministic
    bias = "none",
    use_gradient_checkpointing = True,
)


Unsloth: Making `model.base_model.model.model` require gradients


In [6]:
dataset = load_dataset(
    "json",
    data_files={
        "train": "/content/astra_fin_train.jsonl",
        "eval": "/content/astra_fin_eval.jsonl"
    }
)


Generating train split: 0 examples [00:00, ? examples/s]

Generating eval split: 0 examples [00:00, ? examples/s]

In [7]:
def format_sample(example):
    instruction = example["instruction"]
    input_data = example["input"]
    output_data = example["output"]

    # serialize input deterministically
    input_text = json.dumps(input_data, indent=2, sort_keys=True)
    output_text = json.dumps(output_data, indent=2, sort_keys=True)

    prompt = f"""### Instruction:
{instruction}

### Input:
{input_text}

### Output:
{output_text}"""

    return {"text": prompt}


In [8]:
dataset = dataset.map(format_sample, remove_columns=dataset["train"].column_names)


Map:   0%|          | 0/160 [00:00<?, ? examples/s]

Map:   0%|          | 0/40 [00:00<?, ? examples/s]

In [10]:
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset["train"],
    eval_dataset = dataset["eval"],
    dataset_text_field = "text",
    max_seq_length = 2048,
    packing = False,   # DO NOT pack — destroys structure
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        num_train_epochs = 2,          # start small
        learning_rate = 2e-4,
        fp16 = True,
        logging_steps = 10,
        save_total_limit = 1,
        output_dir = "./astra_fin_gemma1b",
        report_to = "none",
    ),
)


Unsloth: Switching to float32 training since model cannot work with float16


Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/160 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/40 [00:00<?, ? examples/s]

In [11]:
trainer.train()


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 160 | Num Epochs = 2 | Total steps = 40
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 13,045,760 of 1,012,931,712 (1.29% trained)


Step,Training Loss
10,1.5775
20,0.534
30,0.2565
40,0.2056


Unsloth: Will smartly offload gradients to save VRAM!


TrainOutput(global_step=40, training_loss=0.6434090256690979, metrics={'train_runtime': 161.854, 'train_samples_per_second': 1.977, 'train_steps_per_second': 0.247, 'total_flos': 566427257551872.0, 'train_loss': 0.6434090256690979, 'epoch': 2.0})

In [12]:
FastLanguageModel.for_inference(model)


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Gemma3ForCausalLM(
      (model): Gemma3TextModel(
        (embed_tokens): Gemma3TextScaledWordEmbedding(262144, 1152, padding_idx=0)
        (layers): ModuleList(
          (0-15): 16 x Gemma3DecoderLayer(
            (self_attn): Gemma3Attention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=1152, out_features=1024, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=1152, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=1024, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
    

In [13]:
test_prompt = """### Instruction:
Evaluate loan eligibility for a Personal Loan and provide a JSON underwriting decision.

### Input:
{
  "customer_profile": {
    "age": 29,
    "employment_type": "salaried",
    "city_tier": 2,
    "experience_current_job_months": 7
  },
  "income_details": {
    "net_salary": 21000
  },
  "bureau_summary": {
    "score": 645,
    "dpd_12_months": 1,
    "unsecured_loans": 2,
    "enquiries_3_months": 3
  },
  "bank_statement_summary": {
    "avg_balance": 2200,
    "salary_credits_consistent": false
  },
  "documents": ["aadhaar", "pan"],
  "requested_amount": 350000
}

### Output:
"""


In [14]:
inputs = tokenizer(test_prompt, return_tensors="pt").to("cuda")

outputs = model.generate(
    **inputs,
    max_new_tokens=300,
    temperature=0.1,
    do_sample=False,
)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))


### Instruction:
Evaluate loan eligibility for a Personal Loan and provide a JSON underwriting decision.

### Input:
{
  "customer_profile": {
    "age": 29,
    "employment_type": "salaried",
    "city_tier": 2,
    "experience_current_job_months": 7
  },
  "income_details": {
    "net_salary": 21000
  },
  "bureau_summary": {
    "score": 645,
    "dpd_12_months": 1,
    "unsecured_loans": 2,
    "enquiries_3_months": 3
  },
  "bank_statement_summary": {
    "avg_balance": 2200,
    "salary_credits_consistent": false
  },
  "documents": ["aadhaar", "pan"],
  "requested_amount": 350000
}

### Output:
{
  "approved_amount": null,
  "decision": "reject",
  "ltv": null,
  "next_steps": "Reject application; verify employment stability.",
  "reasons": [
    "Borderline; requires verification of employment stability"
  ],
  "required_documents": [
    "aadhaar",
    "pan",
    "employment_verification"
  ],
  "risk_flags": [
    "Borderline employment stability"
  ]
}
}



In [15]:
base_model, base_tokenizer = FastLanguageModel.from_pretrained(
    model_name = "google/gemma-3-1b-it",
    max_seq_length = 2048,
    load_in_4bit = True,
)
FastLanguageModel.for_inference(base_model)


==((====))==  Unsloth 2025.12.5: Fast Gemma3 patching. Transformers: 4.57.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.1+cu128. CUDA: 7.5. CUDA Toolkit: 12.8. Triton: 3.5.1
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.33.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gemma3 won't work! Using float32.
Unsloth: Gemma3 does not support SDPA - switching to fast eager.


Gemma3ForCausalLM(
  (model): Gemma3TextModel(
    (embed_tokens): Gemma3TextScaledWordEmbedding(262144, 1152, padding_idx=0)
    (layers): ModuleList(
      (0-15): 16 x Gemma3DecoderLayer(
        (self_attn): Gemma3Attention(
          (q_proj): Linear4bit(in_features=1152, out_features=1024, bias=False)
          (k_proj): Linear4bit(in_features=1152, out_features=256, bias=False)
          (v_proj): Linear4bit(in_features=1152, out_features=256, bias=False)
          (o_proj): Linear4bit(in_features=1024, out_features=1152, bias=False)
          (q_norm): Gemma3RMSNorm((256,), eps=1e-06)
          (k_norm): Gemma3RMSNorm((256,), eps=1e-06)
        )
        (mlp): Gemma3MLP(
          (gate_proj): Linear4bit(in_features=1152, out_features=6912, bias=False)
          (up_proj): Linear4bit(in_features=1152, out_features=6912, bias=False)
          (down_proj): Linear4bit(in_features=6912, out_features=1152, bias=False)
          (act_fn): GELUTanh()
        )
        (input_layernor

In [16]:
inputs = base_tokenizer(test_prompt, return_tensors="pt").to("cuda")

base_out = base_model.generate(
    **inputs,
    max_new_tokens=300,
    temperature=0.1,
    do_sample=False,
)

print(base_tokenizer.decode(base_out[0], skip_special_tokens=True))


### Instruction:
Evaluate loan eligibility for a Personal Loan and provide a JSON underwriting decision.

### Input:
{
  "customer_profile": {
    "age": 29,
    "employment_type": "salaried",
    "city_tier": 2,
    "experience_current_job_months": 7
  },
  "income_details": {
    "net_salary": 21000
  },
  "bureau_summary": {
    "score": 645,
    "dpd_12_months": 1,
    "unsecured_loans": 2,
    "enquiries_3_months": 3
  },
  "bank_statement_summary": {
    "avg_balance": 2200,
    "salary_credits_consistent": false
  },
  "documents": ["aadhaar", "pan"],
  "requested_amount": 350000
}

### Output:
```json
{
  " underwriting_decision": "Approved",
  "reasoning": "The customer's income, combined with the bank statement summary and the score, indicates a strong likelihood of loan approval. The consistent inquiry history and the current employment status also contribute positively to the assessment."
}
```
```
### Explanation:
The loan eligibility evaluation process involves several fa