#### This was done on google colab and for it to be rendered by github, the outputs were cleared.

**Install libraries**

In [None]:
!pip install -q transformers accelerate bitsandbytes peft datasets

**Import libraries**

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

**Mount googe drive**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

**Define the base  and the lora model  paths**

In [None]:
base_model_path = "/content/drive/MyDrive/Potential_Talent/Llama_3.2_3B_instruct"
lora_path = "/content/drive/MyDrive/Potential_Talent/lora_adapters"

**Load tokenizer**

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(
    "/content/drive/MyDrive/Potential_Talent/Llama_3.2_3B_instruct",
    use_fast=True,
    fix_mistral_regex=True
)

tokenizer.pad_token = tokenizer.eos_token

**Load the base model**

In [None]:
from transformers import AutoModelForCausalLM
import torch

model = AutoModelForCausalLM.from_pretrained(
    base_model_path,
    device_map="auto",
    load_in_4bit=True,
    dtype=torch.float16
)

**Load the trained lora model**

In [None]:
# Load the LoRA adapters on top of the base model
model = PeftModel.from_pretrained(model, lora_path)

# Set model to evaluation mode for inference
model.eval()


**Define and configure inference function**

In [None]:
def generate(prompt, max_new_tokens=128, temperature=0.7):
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )
    return tokenizer.decode(output_ids[0], skip_special_tokens=True)

**Test the prompt**

In [None]:
prompt = "Candidate profile:\nTitle: software engineer\nLocation: United States\n\nEvaluate the candidate and assign a screening score."
output = generate(prompt)
print(output)
