# Model selection for resume-job matching

This notebook fine-tunes multiple transformer models on the curated resume/job matching dataset and records accuracy, latency, and memory usage.

In [None]:
from pathlib import Path
import json

from src.models import ModelSelectionConfig, run_model_selection
from src.models.finetune import persist_model_selection


In [None]:
config = ModelSelectionConfig(
    model_names=[
        "sentence-transformers/all-MiniLM-L6-v2",
        "distilbert-base-uncased",
        "bert-base-uncased",
    ],
    resume_path="data/resumes/resumes.jsonl",
    job_path="data/jobs/jobs.jsonl",
    output_dir="artifacts/model_runs",
    validation_split=0.2,
    num_train_epochs=2,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
)


In [None]:
results = run_model_selection(config)
results


In [None]:
metrics_path = Path("models/best_model/metrics.json")
best_model = persist_model_selection(
    results,
    best_dir="models/best_model",
    metrics_path=metrics_path,
)

with metrics_path.open() as handle:
    json.load(handle)
