In [8]:
import torch
import pandas as pd
from typing import Literal
from peft import get_peft_model, LoraConfig, TaskType
from transformers import AutoModelForCausalLM, AutoTokenizer

In [9]:
data_for_training: dict[Literal["train", "validation"], str] = {
    "train": "data/train_data.json",
    "validation": "data/val_data.json"
}

In [None]:
cache_dir = "models"
modelID = "openai-community/gpt2"

device = 'cuda' if torch.cuda.is_available() else 'cpu'

tokenizer = AutoTokenizer.from_pretrained(modelID, cache_dir=cache_dir)

# Set padding token
tokenizer.padding_side = "right"               # Set padding side to left
tokenizer.pad_token = tokenizer.eos_token      # Using eos_token as pad_token

model = AutoModelForCausalLM.from_pretrained(modelID, device_map='auto', cache_dir=cache_dir)

model.config.pad_token_id = tokenizer.pad_token_id  # Set the pad_token_id in the model config

config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, config)
model = model.to(device)
model.load_state_dict(torch.load("lora.pt", map_location=device))

In [5]:
# Function for inference
def generate_job_title(input_text):
    # Prepare the input
    input_text = f"{input_text}"
    input_ids = tokenizer.encode(input_text, return_tensors='pt')

    # Create attention mask
    attention_mask = torch.ones(input_ids.shape, dtype=torch.long)  # 1 for real tokens, 0 for padding

    # Move tensors to the device (GPU if available)
    input_ids = input_ids.to(model.device)
    attention_mask = attention_mask.to(model.device)

    # Generate text with the model
    generated_outputs = model.generate(
        input_ids,
        attention_mask=attention_mask,
        max_length=input_ids.shape[-1] + 20,  # Max length is input length + 40
        num_return_sequences=1,
        do_sample=True,                       # Enable sampling
        temperature=0.9,                     # Control randomness
        top_k=50,                            # Top-K sampling
        top_p=0.95,                          # Top-P (nucleus) sampling
        pad_token_id=tokenizer.eos_token_id  # Padding token id
    )

    # Decode and return the generated text
    generated_text = tokenizer.decode(generated_outputs[0], skip_special_tokens=True)
    return generated_text

In [None]:
generate_job_title(i)

In [None]:
word = "job title:"
input_series = pd.read_json(data_for_training['validation'])["text"].apply(lambda x: x[: (len(word) + x.index(word)) ])
input_series[0]

In [None]:
for i in input_series:
    predicted_title = generate_job_title(i)
    print(f"\n{predicted_title}\n")