In [1]:
import pandas as pd
import json

In [2]:
# Load both datasets
resume_df = pd.read_csv('/kaggle/input/resume-and-job-description/Resume.csv')
training_df = pd.read_csv('/kaggle/input/resume-and-job-description/training_data.csv')

In [3]:
# merge resume info with the job description
merged = pd.concat([resume_df['Resume_str'], training_df], axis=1)

In [4]:
# Format training samples
training_samples = []
for _, row in merged.iterrows():
    resume = row['Resume_str']
    jd = row['job_description']
    output = row['model_response']

    if isinstance(resume, str) and isinstance(jd, str) and isinstance(output, str):
        sample = {
            "text": f"[Resume]\n{resume}\n[JD]\n{jd}\n[OUTPUT]\n{output}"
        }
        training_samples.append(sample)

In [5]:
# Save as JSON
with open("fine_tune_data.json", "w") as f:
    json.dump(training_samples, f, indent=2)

# Train with Unsloth

In [6]:
# Install
!pip install "unsloth[torch]" datasets trl peft bitsandbytes accelerate

Collecting trl
  Downloading trl-0.20.0-py3-none-any.whl.metadata (11 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.46.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting unsloth[torch]
  Downloading unsloth-2025.8.1-py3-none-any.whl.metadata (47 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.3/47.3 kB[0m [31m911.4 kB/s[0m eta [36m0:00:00[0m
[0mCollecting unsloth_zoo>=2025.8.1 (from unsloth[torch])
  Downloading unsloth_zoo-2025.8.1-py3-none-any.whl.metadata (8.1 kB)
Collecting xformers>=0.0.27.post2 (from unsloth[torch])
  Downloading xformers-0.0.31.post1-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (1.1 kB)
Collecting tyro (from unsloth[torch])
  Downloading tyro-0.9.27-py3-none-any.whl.metadata (11 kB)
Collecting huggingface_hub>=0.34.0 (from unsloth[torch])
  Downloading huggingface_hub-0.34.3-py3-none-any.whl.metadata (14 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets

In [7]:
# Load dataset
from datasets import load_dataset
dataset = load_dataset("json", data_files="fine_tune_data.json")["train"]

Generating train split: 0 examples [00:00, ? examples/s]

In [8]:
# Load Gemma via Unsloth
import torch
from unsloth import FastLanguageModel
from trl import SFTTrainer

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


2025-08-02 11:19:28.841548: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754133569.078932      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754133569.129981      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


🦥 Unsloth Zoo will now patch everything to make training faster!


In [9]:
import time
start_time = time.time()

def log_time(args, state, **kwargs):
    if state.global_step % 5 == 0:
        elapsed = time.time() - start_time
        avg_time = elapsed / (state.global_step + 1)
        remaining = avg_time * (state.max_steps - state.global_step)
        print(
            f"Step {state.global_step}/{state.max_steps} | "
            f"Avg: {avg_time:.2f}s/step | "
            f"ETA: {remaining/60:.1f} min"
        )

In [10]:
import os
from transformers import TrainerCallback
os.environ["WANDB_DISABLED"] = "true"  # Force-disable WandB
os.environ["TRITON_DISABLE"] = "1"
os.environ["UNSLOTH_DISABLE_TRITON"] = "1"

class TimeTrackerCallback(TrainerCallback):
    def __init__(self):
        self.start_time = None
    
    def on_train_begin(self, args, state, control, **kwargs):
        self.start_time = time.time()
        print(f"Training started at {time.strftime('%X')}")
    
    def on_step_end(self, args, state, control, **kwargs):
        if state.global_step % 10 == 0:
            elapsed = time.time() - self.start_time
            avg_time = elapsed / (state.global_step + 1)
            remaining = avg_time * (state.max_steps - state.global_step)
            print(
                f"Step {state.global_step}/{state.max_steps} | "
                f"Avg: {avg_time:.2f}s/step | "
                f"ETA: {remaining/60:.1f} min"
            )

In [11]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/gemma-2b-it",
    max_seq_length = 2048,
    dtype = torch.float16,
    load_in_4bit = True,
    device_map = {"": torch.cuda.current_device()}
)

model = FastLanguageModel.get_peft_model(
    model,
    r = 8,
    target_modules = ["q_proj", "v_proj"],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = True,
)

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    args = {
        "output_dir": "gemma_hr_lora",
        "num_train_epochs": 3,
        "per_device_train_batch_size": 8,
        "gradient_accumulation_steps": 2,
        "save_strategy": "epoch",
        "logging_steps": 5,
        "fp16": True,
        "optim": "adamw_torch",
        "max_seq_length": 2048,
        "dataloader_num_workers": 4,
        "dataloader_pin_memory": True,
        "report_to": "none",
    },
    callbacks=[TimeTrackerCallback()]
)

trainer.train()

==((====))==  Unsloth 2025.8.1: Fast Gemma patching. Transformers: 4.54.1.
   \\   /|    Tesla P100-PCIE-16GB. Num GPUs = 1. Max memory: 15.888 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.1+cu126. CUDA: 6.0. CUDA Toolkit: 12.6. Triton: 3.3.1
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.31.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.07G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/154 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

Not an error, but Unsloth cannot patch MLP layers with our manual autograd engine since either LoRA adapters
are not enabled or a bias term (like in Qwen) is used.
Not an error, but Unsloth cannot patch Attention layers with our manual autograd engine since either LoRA adapters
are not enabled or a bias term (like in Qwen) is used.
Not an error, but Unsloth cannot patch O projection layer with our manual autograd engine since either LoRA adapters
are not enabled or a bias term (like in Qwen) is used.
Unsloth 2025.8.1 patched 18 layers with 0 QKV layers, 0 O layers and 0 MLP layers.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/853 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 853 | Num Epochs = 3 | Total steps = 321
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 2
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 2 x 1) = 8
 "-____-"     Trainable parameters = 921,600 of 2,507,094,016 (0.04% trained)


Training started at 11:20:33
Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,2.8445
2,2.8664
3,2.857
4,2.7885
5,2.8019
6,2.7481
7,2.8527
8,2.772
9,2.8456
10,2.8019


Step 10/321 | Avg: 32.11s/step | ETA: 166.4 min
Step 20/321 | Avg: 33.36s/step | ETA: 167.3 min
Step 30/321 | Avg: 33.74s/step | ETA: 163.7 min
Step 40/321 | Avg: 34.06s/step | ETA: 159.5 min
Step 50/321 | Avg: 34.21s/step | ETA: 154.5 min
Step 60/321 | Avg: 34.29s/step | ETA: 149.1 min
Step 70/321 | Avg: 34.32s/step | ETA: 143.6 min
Step 80/321 | Avg: 34.34s/step | ETA: 138.0 min
Step 90/321 | Avg: 34.33s/step | ETA: 132.2 min
Step 100/321 | Avg: 34.39s/step | ETA: 126.7 min
Step 110/321 | Avg: 34.28s/step | ETA: 120.5 min
Step 120/321 | Avg: 34.33s/step | ETA: 115.0 min
Step 130/321 | Avg: 34.39s/step | ETA: 109.5 min
Step 140/321 | Avg: 34.35s/step | ETA: 103.6 min
Step 150/321 | Avg: 34.38s/step | ETA: 98.0 min
Step 160/321 | Avg: 34.36s/step | ETA: 92.2 min
Step 170/321 | Avg: 34.40s/step | ETA: 86.6 min
Step 180/321 | Avg: 34.37s/step | ETA: 80.8 min
Step 190/321 | Avg: 34.40s/step | ETA: 75.1 min
Step 200/321 | Avg: 34.41s/step | ETA: 69.4 min
Step 210/321 | Avg: 34.43s/step | E

TrainOutput(global_step=321, training_loss=2.583988925749639, metrics={'train_runtime': 11096.4727, 'train_samples_per_second': 0.231, 'train_steps_per_second': 0.029, 'total_flos': 6.17415932323799e+16, 'train_loss': 2.583988925749639})

In [12]:
# Save model + tokenizer
model.save_pretrained("gemma_hr_lora")
tokenizer.save_pretrained("gemma_hr_lora")

('gemma_hr_lora/tokenizer_config.json',
 'gemma_hr_lora/special_tokens_map.json',
 'gemma_hr_lora/tokenizer.model',
 'gemma_hr_lora/added_tokens.json',
 'gemma_hr_lora/tokenizer.json')

In [13]:
!zip -r gemma_hr_lora.zip gemma_hr_lora

  adding: gemma_hr_lora/ (stored 0%)
  adding: gemma_hr_lora/tokenizer.model (deflated 51%)
  adding: gemma_hr_lora/tokenizer_config.json (deflated 96%)
  adding: gemma_hr_lora/tokenizer.json (deflated 84%)
  adding: gemma_hr_lora/special_tokens_map.json (deflated 76%)
  adding: gemma_hr_lora/adapter_config.json (deflated 54%)
  adding: gemma_hr_lora/adapter_model.safetensors (deflated 8%)
  adding: gemma_hr_lora/README.md (deflated 66%)
