In [None]:
%%capture
!pip install unsloth
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

In [7]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048
dtype = None
load_in_4bit = True

# Load the model and tokenizer from the Hugging Face Hub
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="minifixio/lora_model",
    max_seq_length=2048,
    dtype=None,
    load_in_4bit=True,
)

==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [16]:
from datasets import load_dataset

dataset = load_dataset("ShashiVish/cover-letter-dataset", split="train")

def format_cover_letter_with_labels(batch):
    inputs_list = []
    for i in range(len(batch['Job Title'])):
        job_title = batch['Job Title'][i]
        qualifications = batch['Preferred Qualifications'][i]
        company = batch['Hiring Company'][i]
        name = batch['Applicant Name'][i]
        past_experience = batch['Past Working Experience'][i]
        current_experience = batch['Current Working Experience'][i]
        skillsets = batch['Skillsets'][i]
        qualifications = batch['Qualifications'][i]
        cover_letter = batch['Cover Letter'][i]

        prompt = (
            f"Write a personalized cover letter.\n\n"
            f"Job Title: {job_title}\n"
            f"Preferred Qualifications: {qualifications}\n"
            f"Hiring Company: {company}\n"
            f"Applicant Name: {name}\n"
            f"Past Working Experience: {past_experience}\n"
            f"Current Working Experience: {current_experience}\n"
            f"Skillsets: {skillsets}\n"
            f"Qualifications: {qualifications}\n\n"
            f"Cover Letter:"
        )

        input_text = prompt
        output_text = cover_letter

        # Tokenize and create input-output pairs
        inputs = tokenizer(input_text, truncation=True, padding="max_length", max_length=max_seq_length)
        labels = tokenizer(output_text, truncation=True, padding="max_length", max_length=max_seq_length)

        inputs["labels"] = labels["input_ids"]
        inputs_list.append(inputs)

    # Combine all inputs into a single dictionary of lists
    return {key: [d[key] for d in inputs_list] for key in inputs_list[0].keys()}


dataset = dataset.map(format_cover_letter_with_labels, batched=True)


{'Job Title': 'Senior Java Developer', 'Preferred Qualifications': '5+ years of experience in Java Development', 'Hiring Company': 'Google', 'Applicant Name': 'John Doe', 'Past Working Experience': 'Java Developer at XYZ for 3 years', 'Current Working Experience': 'Senior Java Developer at ABC for 2 years', 'Skillsets': 'Java, Spring Boot, Hibernate, SQL', 'Qualifications': 'BSc in Computer Science', 'Cover Letter': 'I am writing to express my interest in the Senior Java Developer position at Google. With over 5 years of experience in Java development, I am confident in my ability to contribute effectively to your team. My professional experience includes designing and implementing Java applications, managing the full software development lifecycle, and troubleshooting and resolving technical issues. I also possess strong skills in Spring Boot, Hibernate and SQL. I am a diligent and dedicated professional, always looking to improve and learn new skills. I am excited about the opportuni

Map:   0%|          | 0/813 [00:00<?, ? examples/s]

In [17]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  # Choose based on your hardware (e.g., 16 for fine-tuning efficiency)
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,  # Set dropout rate to 0 for optimized training
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False,  # Optional rank stabilization
    loftq_config=None,
)


Unsloth: Already have LoRA adapters! We shall skip this step.


In [18]:
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from trl import SFTTrainer
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=2048,
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer),
    dataset_num_proc=2,
    packing=False,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        num_train_epochs=1,
        learning_rate=2e-4,
        fp16=True,
        bf16=False,
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
        save_strategy="steps",
        save_steps=50,
        report_to="none",
    ),
)

In [19]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 813 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 101
 "-____-"     Number of trainable parameters = 11,272,192


Step,Training Loss
1,11.9389
2,11.506
3,11.4767
4,10.6993
5,10.4953
6,10.3328
7,9.3496
8,8.7828
9,8.7149
10,8.2685


In [20]:
# Save your HuggingFace API key in a file named huggingface_api_key.txt
with open("huggingface_api_key.txt", "r") as file:
    huggingface_api_key = file.read().strip()

In [21]:
model.save_pretrained("lora_model_cover_letters") # Local saving
tokenizer.save_pretrained("lora_mode_cover_lettersl")
model.push_to_hub("minifixio/lora_model_cover_letters", token = huggingface_api_key) # Online saving
tokenizer.push_to_hub("minifixio/lora_model_cover_letters", token = huggingface_api_key) # Online saving

README.md:   0%|          | 0.00/598 [00:00<?, ?B/s]

  0%|          | 0/1 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/45.1M [00:00<?, ?B/s]

Saved model to https://huggingface.co/minifixio/lora_model_cover_letters


  0%|          | 0/1 [00:00<?, ?it/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

In [None]:
# Save to multiple GGUF options - much faster if you want multiple!
if True:
    model.push_to_hub_gguf(
        "minifixio/lora_model_cover_letters_gguf", # Change hf to your username!
        tokenizer,
        quantization_method = ["q4_k_m", "q8_0", "q5_k_m",],
        token = huggingface_api_key, # Get a token at https://huggingface.co/settings/tokens
    )