In [29]:
# conda activate finetune
# Install core libraries for QLoRA fine-tuning
# %pip install "transformers>=4.44" "datasets" "accelerate" "trl" "peft" "bitsandbytes"


In [30]:
import torch

cuda_available = torch.cuda.is_available()
device_name = torch.cuda.get_device_name(0) if cuda_available else "No GPU detected"
cuda_available, device_name

(True, 'NVIDIA GeForce RTX 4070')

# Section 2 — Define the Prompt Template
The prompt guides the model to transform a resume and job description into an optimized resume plus ATS scoring JSON.

In [31]:
PROMPT_TEMPLATE = """You are an expert resume optimization and ATS analysis engine.

Task:
Given a candidate's raw resume text and a job description, you must:
1) Rewrite the resume so it is strongly aligned to the job description.
2) Produce a structured JSON version of the optimized resume using the schema shown below.
3) Assign ATS-style match scores:
   - ats_score_original
   - ats_score_regenerated
4) Compute improvement = ats_score_regenerated - ats_score_original

Output rules:
- Return ONLY a single valid JSON object.
- Use EXACTLY these keys:
  "optimized_resume_json",
  "optimized_resume_text",
  "ats_score_original",
  "ats_score_regenerated",
  "improvement"

Resume text:
<<<RESUME>>>
{resume_text}
<<<END_RESUME>>>

Job description:
<<<JOB_DESCRIPTION>>>
{job_description}
<<<END_JOB_DESCRIPTION>>>
"""

# Section 3 — Load & Format Dataset
The dataset lives under `dataset/large_dataset_20251103_231545.jsonl`; each JSONL row already includes `resume_text`, `job_description`, `optimized_resume_text`, `optimized_resume_json`, `ats_score_original`, `ats_score_regenerated`, and `improvement`. We sanitize each row (coercing any stringified `experiences[].description` fields into lists and skipping malformed JSON entries) before shuffling and splitting 70 %/15 %/15 % for train/validation/test.

In [32]:
from datasets import Dataset, DatasetDict
import json
from pathlib import Path

DATA_DIR = Path("dataset")
DATA_FILE = DATA_DIR / "large_dataset_20251103_231545.jsonl"

def _coerce_experience_descriptions(exp_list):
    if not isinstance(exp_list, list):
        return []
    cleaned = []
    for entry in exp_list:
        if not isinstance(entry, dict):
            continue
        description = entry.get("description")
        if isinstance(description, str):
            entry["description"] = [description]
        elif isinstance(description, list):
            entry["description"] = [str(item) for item in description if isinstance(item, (str, int, float))]
        else:
            entry["description"] = []
        cleaned.append(entry)
    return cleaned

def _sanitize_record(record):
    for key in ("resume_text", "job_description", "optimized_resume_text"):
        value = record.get(key)
        if not isinstance(value, str):
            record[key] = "" if value is None else str(value)
    optimized = record.get("optimized_resume_json")
    if not isinstance(optimized, dict):
        optimized = {}
    else:
        optimized["experiences"] = _coerce_experience_descriptions(
            optimized.get("experiences", [])
        )
    record["optimized_resume_json"] = json.dumps(optimized, ensure_ascii=False)
    return record

records = []
skipped_rows = []
with DATA_FILE.open("r", encoding="utf-8") as source:
    for idx, raw in enumerate(source, start=1):
        raw = raw.strip()
        if not raw:
            continue
        try:
            record = json.loads(raw)
        except json.JSONDecodeError as exc:
            skipped_rows.append((idx, str(exc)))
            continue
        records.append(_sanitize_record(record))

if skipped_rows:
    print(f"Skipped {len(skipped_rows)} malformed rows (showing up to 3): {skipped_rows[:3]}")

full_dataset = Dataset.from_list(records).shuffle(seed=42)
train_test_split = full_dataset.train_test_split(test_size=0.30, seed=42)
val_test_split = train_test_split["test"].train_test_split(test_size=0.5, seed=42)

raw_dataset = DatasetDict({
    "train": train_test_split["train"],
    "val": val_test_split["train"],
    "test": val_test_split["test"],
})
raw_dataset

Skipped 1 malformed rows (showing up to 3): [(118, "Expecting ',' delimiter: line 1 column 50 (char 49)")]


DatasetDict({
    train: Dataset({
        features: ['resume_text', 'job_description', 'optimized_resume_text', 'optimized_resume_json', 'ats_score_original', 'ats_score_regenerated', 'improvement'],
        num_rows: 912
    })
    val: Dataset({
        features: ['resume_text', 'job_description', 'optimized_resume_text', 'optimized_resume_json', 'ats_score_original', 'ats_score_regenerated', 'improvement'],
        num_rows: 196
    })
    test: Dataset({
        features: ['resume_text', 'job_description', 'optimized_resume_text', 'optimized_resume_json', 'ats_score_original', 'ats_score_regenerated', 'improvement'],
        num_rows: 196
    })
})

## Section 3.1 — Build Prompted Training Examples
Define the preprocessing helper that merges each resume/job pair with the standard prompt and serialized target JSON expected by the SFT trainer.

In [33]:
def _build_target_payload(example):
    import json
    optimized_json = example.get("optimized_resume_json", "{}")
    if isinstance(optimized_json, str):
        try:
            optimized_dict = json.loads(optimized_json)
        except json.JSONDecodeError:
            optimized_dict = {}
    elif isinstance(optimized_json, dict):
        optimized_dict = optimized_json
    else:
        optimized_dict = {}
    return json.dumps(
        {
            "optimized_resume_json": optimized_dict,
            "optimized_resume_text": example.get("optimized_resume_text", ""),
            "ats_score_original": int(example.get("ats_score_original", 0) or 0),
            "ats_score_regenerated": int(example.get("ats_score_regenerated", 0) or 0),
            "improvement": int(example.get("improvement", 0) or 0),
        },
        ensure_ascii=False,
    )

def preprocess(
    example,
    _prompt_template=PROMPT_TEMPLATE,
    _target_builder=_build_target_payload,
):
    prompt = _prompt_template.format(
        resume_text=example.get("resume_text", ""),
        job_description=example.get("job_description", ""),
    ).strip()
    target_json = _target_builder(example)
    example["text"] = f"{prompt}\n{target_json}"
    return example

## Section 3.2 — Apply Preprocessing
Map the preprocessing function across the dataset to create the `text` field used for supervised fine-tuning.

In [None]:
processed_dataset = raw_dataset.map(
    preprocess,
    num_proc=4,
)
processed_dataset

Map (num_proc=4): 100%|██████████| 912/912 [00:03<00:00, 431.11 examples/s]

# Section 4 — Load Model with QLoRA
Load Qwen2.5-3B with BitsAndBytes 4-bit quantization and attach a LoRA adapter to the attention and MLP projections.

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model

MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="bfloat16",
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
)

lora_config = LoraConfig(
    r=32,
    lora_alpha=32,
    lora_dropout=0.05,
    task_type="CAUSAL_LM",
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
)

model = get_peft_model(model, lora_config)
model

Loading checkpoint shards: 100%|██████████| 2/2 [00:03<00:00,  1.76s/it]



PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Qwen2ForCausalLM(
      (model): Qwen2Model(
        (embed_tokens): Embedding(151936, 2048)
        (layers): ModuleList(
          (0-35): 36 x Qwen2DecoderLayer(
            (self_attn): Qwen2Attention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=2048, out_features=2048, bias=True)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=32, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=32, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lora

# Section 5 — QLoRA Fine-Tuning with TRL
Train with TRL's `SFTTrainer` for three epochs using cosine learning-rate scheduling and 2k token context.

In [None]:
from trl import SFTTrainer, SFTConfig

OUTPUT_DIR = "./qwen25_resume_lora"

sft_config = SFTConfig(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=8,
    num_train_epochs=3,
    learning_rate=2e-5,
    bf16=True,
    eval_strategy="steps",
    eval_steps=500,
    save_steps=500,
    save_total_limit=3,
    logging_steps=20,
    lr_scheduler_type="cosine",
    warmup_ratio=0.03,
    report_to="none",
    gradient_checkpointing=True,
    dataset_text_field="text",
    max_length=2048,
    packing=True,
)

trainer = SFTTrainer(
    model=model,
    args=sft_config,
    train_dataset=processed_dataset["train"],
    eval_dataset=processed_dataset["val"],
    processing_class=tokenizer,
)

trainer.train()
trainer.save_model(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

Padding-free training is enabled, but the attention implementation is not set to a supported flash attention variant. Padding-free training flattens batches into a single sequence, and only the following implementations are known to reliably support this: flash_attention_2, flash_attention_3, kernels-community/flash-attn, kernels-community/flash-attn3, kernels-community/vllm-flash-attn3. Using other implementations may lead to unexpected behavior. To ensure compatibility, set `attn_implementation` in the model configuration to one of these supported options or verify that your attention mechanism can handle flattened sequences.
You are using packing, but the attention implementation is not set to a supported flash attention variant. Packing gathers multiple samples into a single sequence, and only the following implementations are known to reliably support this: flash_attention_2, flash_attention_3, kernels-community/flash-attn, kernels-community/flash-attn3, kernels-community/vllm-fla

Step,Training Loss,Validation Loss


KeyboardInterrupt: 

# Section 6 — JSON Validation Pipeline
Generation outputs often include the prompt, so we trim it, extract the first JSON object, and validate key presence plus types.

In [None]:
import torch

def generate_output(resume_text, job_description):
    prompt = PROMPT_TEMPLATE.format(
        resume_text=resume_text,
        job_description=job_description,
    )
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=1024,
            temperature=0.1,
            top_p=0.9,
            do_sample=False,
            eos_token_id=tokenizer.eos_token_id,
        )

    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
    if prompt in decoded:
        decoded = decoded[len(prompt):].strip()

    first_brace = decoded.find("{")
    if first_brace > 0:
        decoded = decoded[first_brace:]

    return decoded

def validate_json_structure(obj):
    required = [
        "optimized_resume_json",
        "optimized_resume_text",
        "ats_score_original",
        "ats_score_regenerated",
        "improvement",
    ]
    for key in required:
        if key not in obj:
            return False
    if not isinstance(obj["optimized_resume_json"], dict):
        return False
    if not isinstance(obj["optimized_resume_text"], str):
        return False
    return True

## Section 6.1 — Run Validation on Sample Batch
Sample random validation examples, generate outputs, and report whether JSON parsing and structure checks succeed.

In [None]:
import json
import random

val_data = processed_dataset["val"]
indices = list(range(len(val_data)))
random.shuffle(indices)
indices = indices[:20]

for idx in indices:
    example = val_data[idx]
    generated = generate_output(example["resume_text"], example["job_description"])
    try:
        parsed = json.loads(generated)
        print("VALID JSON ✓" if validate_json_structure(parsed) else "STRUCTURE FAIL")
    except Exception:
        print("JSON PARSE FAIL")

# Section 7 — Save Model, Tokenizer, and Instructions
Exported artifacts in `OUTPUT_DIR` can be converted to GGUF, wrapped in a FastAPI service, or deployed on your preferred inference stack.