In [1]:
#pip install transformers


In [46]:
%pip install peft
%pip install datasets



In [47]:
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

Importing the model

In [48]:


model_name = "google/flan-t5-small"
# Check if CUDA is available and set device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")
foundation_model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device) # Move the model to the device


Checking my model how it performs in zero shot for summarization

In [6]:
# This is just to check the model with no fine tuning



resume_text = "John Doe | Software Developer Experience: Java, Python, and C++ (4 years) Agile methodologies, RESTful APIs, and database management Education: Bachelor's degree in Computer Science, XYZ University"

input1 = tokenizer(f"Resume: {resume_text}\nSummarize this Resume:", return_tensors="pt").to(device)
print("Input IDs:", input1["input_ids"])
print("Attention Mask:", input1["attention_mask"])
foundation_outputs = foundation_model.generate(
    input_ids=input1["input_ids"],
    attention_mask=input1["attention_mask"],
    max_new_tokens=150,
    do_sample=True,
    temperature=0.3,
    eos_token_id=tokenizer.eos_token_id,
    top_k=50,  # Use top-k sampling
    top_p=0.9  # Use top-p (nucleus) sampling
    )
print(tokenizer.batch_decode(foundation_outputs, skip_special_tokens=True))


Input IDs: tensor([[ 9410,    10,  1079,   531,    15,  1820,  4300, 17179,  7187,    10,
         10318,     6, 20737,     6,    11,   205, 16702,  8457,   203,    61,
         26633, 25984,     6,   391,  6038,  1329,  6429,     7,     6,    11,
          3501,   758,  2855,    10, 10199,    31,     7,  1952,    16,  5491,
          2854,     6,     3,     4,   476,   956,   636, 12198,  1635,  1737,
            48,  9410,    10,     1]])
Attention Mask: tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1]])
['John Doe is a software developer. He is a software engineer.']


In [49]:
from google.colab import drive
import os
# Mount Google Drive
drive.mount('/content/drive')

import os

output_directory = "/content/drive/MyDrive/Colab Notebooks/Text Summarization/"

if not os.path.exists(output_directory):
    os.mkdir(output_directory)

path = '/content/drive/MyDrive/Colab Notebooks/Text Summarization'
files = os.listdir(path)
print(files)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
['Datasets', 'working_dir', 'runs', 'peft_model_1720299458.6191595']


# **Load The Resume dataset**

In [50]:
from datasets import load_dataset, DatasetDict,Dataset

ds = load_dataset("burberg92/resume_summary",cache_dir="/content/drive/MyDrive/Colab Notebooks/Text Summarization/Datasets")
print(ds)


DatasetDict({
    train: Dataset({
        features: ['resume', 'ex_summary'],
        num_rows: 100
    })
})


# **Creating custom prompt template**

In [56]:
def create_prompt(example):
    return {'prompt':  example['resume'], 'summary': example['ex_summary']}

# prompt_dataset = ds.map(create_prompt)
# prompt_dataset = prompt_dataset.remove_columns(['resume', 'ex_summary'])
# print(prompt_dataset)



In [57]:
# Define the function to create prompt formats
def create_prompt_formats(sample):
    """
    Format various fields of the sample ('instruction','output')
    Then concatenate them using two newline characters
    :param sample: Sample dictionary
    """
    INTRO_BLURB = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
    INSTRUCTION_KEY = "### Instruct: Summarize the below conversation."
    RESPONSE_KEY = "### Summary:"
    END_KEY = "### End"

    blurb = f"\n{INTRO_BLURB}"
    instruction = f"{INSTRUCTION_KEY}"
    input_context = f"{sample['prompt']}" if sample["prompt"] else None
    response = f"{RESPONSE_KEY}\n{sample['summary']}"
    end = f"{END_KEY}"

    parts = [part for part in [blurb, instruction, input_context, response, end] if part]

    formatted_prompt = "\n\n".join(parts)
    sample["text"] = formatted_prompt

    return sample

# Apply the function to create the prompt dataset
prompt_dataset = ds.map(create_prompt)
prompt_dataset = prompt_dataset.remove_columns(['resume', 'ex_summary'])
print(prompt_dataset)
print('--------')
# Apply the function to create the prompt formats
formatted_prompt_dataset = prompt_dataset.map(create_prompt_formats)

# Check the first few examples to ensure the prompts are formatted correctly
formatted_prompt_dataset['train']


DatasetDict({
    train: Dataset({
        features: ['prompt', 'summary'],
        num_rows: 100
    })
})
--------


Dataset({
    features: ['prompt', 'summary', 'text'],
    num_rows: 100
})

Splitting the data into training and validation data

In [58]:
from sklearn.model_selection import train_test_split
k = formatted_prompt_dataset['train'].train_test_split(test_size=0.2)

data_slpit=DatasetDict({
    'train':k['train'],
    'val':k['test']
})
print(data_slpit)

DatasetDict({
    train: Dataset({
        features: ['prompt', 'summary', 'text'],
        num_rows: 80
    })
    val: Dataset({
        features: ['prompt', 'summary', 'text'],
        num_rows: 20
    })
})


In [61]:
formatted_prompt_dataset['train']['summary'][0]

"Results-driven Product Manager with 5 years of experience in product lifecycle management, market research, and roadmap development. Skilled in Agile methodologies and collaboration with cross-functional teams. Holds a Bachelor's degree in Business Administration from NOP University."

Max lenght of tokens that the model can hold

In [13]:
def get_max_length(model):
    conf = model.config
    max_length = None
    for length_setting in ["n_positions", "max_position_embeddings", "seq_length"]:
        max_length = getattr(model.config, length_setting, None)
        if max_length:
            print(f"Found max lenth: {max_length}")
            break
    if not max_length:
        max_length = 1024
        print(f"Using default max length: {max_length}")
    return max_length


get_max_length(foundation_model)

Found max lenth: 512


512

Tokenization

Finding maximum token size

In [62]:
# Function to tokenize the prompts
def tokenize_and_find_length(example):
    tokens = tokenizer(example['prompt'], truncation=False)
    example['input_length'] = len(tokens['input_ids'])
    return example

# Apply the function to the dataset to find the length of each tokenized sequence
tokenized_lengths_dataset = data_slpit.map(tokenize_and_find_length, batched=False)

# Find the maximum token length in the dataset
max_token_length = max(tokenized_lengths_dataset['train']['input_length'])
print(f"Maximum token length in the dataset: {max_token_length}")

Map:   0%|          | 0/80 [00:00<?, ? examples/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Maximum token length in the dataset: 167


In [63]:
def tokenize_function(example):
    model_inputs = tokenizer(example['prompt'], truncation=True, padding='max_length', max_length=max_token_length)
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(example['summary'], truncation=True, padding='max_length', max_length=78)
    model_inputs['labels'] = labels['input_ids']
    return model_inputs

tokenized_dataset = data_slpit.map(tokenize_function, batched=True, remove_columns=['prompt', 'summary', 'text'])
print(tokenized_dataset['train'][0])
print(tokenized_dataset['val'][0])

train_sample=tokenized_dataset['train']
val_sample=tokenized_dataset['val']


Map:   0%|          | 0/80 [00:00<?, ? examples/s]



Map:   0%|          | 0/20 [00:00<?, ? examples/s]

{'input_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12254, 13521, 1820, 24483, 11597, 9020, 19559, 10, 24483, 408, 6, 556, 606, 6, 11, 3, 12926, 889, 749, 17397, 16, 12974, 12492, 7, 6, 2040, 12926, 6, 11, 3188, 7980, 262, 3335, 10582, 41, 6392, 188, 61, 3118, 5528, 10, 24483, 5623, 3037, 29, 6, 480, 11160, 19193, 25195, 4224, 15, 26, 408, 11, 556, 606, 1195, 4001, 15, 26, 81, 3, 12926, 889, 11, 3, 6392, 188, 24483, 11597, 6, 283, 7400, 18080, 41, 10218, 7988, 2773, 61, 3, 14454, 11, 1597, 8168, 1002, 11, 494, 20156, 15, 26, 3, 12926, 889, 11, 3, 6392, 188, 21, 408, 11295, 2855, 10, 10199, 31, 7, 1952, 16, 24483, 5623, 6, 276, 2247, 448, 636, 41, 16660, 18, 11138, 61, 1], 'attention_mask': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1

In [16]:
# # Function to detokenize the token IDs
# def detokenize_function(example):
#     example['decoded_text'] = tokenizer.decode(example['input_ids'], skip_special_tokens=True)
#     #example['decoded_summary'] = tokenizer.decode(example['labels'], skip_special_tokens=True)
#     return example

# # Apply the detokenization function to the validation sample
# detokenized_val_sample = val_sample.map(detokenize_function, batched=False)

# # Print the first few examples to see the results
# for i in range(1):
#     print(f"Decoded Text {i+1}: {detokenized_val_sample[i]['decoded_text']}")
#     #print(f"Decoded Summary {i+1}: {detokenized_val_sample[i]['decoded_summary']}\n")

# **Random Initialization**

In [64]:
from peft import  get_peft_model, PromptTuningConfig, TaskType, PromptTuningInit
from transformers import Seq2SeqTrainingArguments,DataCollatorForSeq2Seq,Seq2SeqTrainer


In [71]:
from peft import  get_peft_model, PromptTuningConfig, TaskType, PromptTuningInit

peft_config = PromptTuningConfig(
    task_type=TaskType.SEQ_2_SEQ_LM,
    prompt_tuning_init=PromptTuningInit.RANDOM,
    num_virtual_tokens=4,
    tokenizer_name_or_path=model_name
)
peft_model = get_peft_model(foundation_model, peft_config)


In [13]:
print(peft_model.print_trainable_parameters())


trainable params: 6,144 || all params: 76,967,296 || trainable%: 0.0080
None


Creating a directory for training parameters


In [66]:


import os

output_directory = "/content/drive/MyDrive/Colab Notebooks/Text Summarization/working_dir"

if not os.path.exists(output_directory):
    os.mkdir(output_directory)
if not os.path.exists(output_directory):
    os.mkdir(output_directory)

In [None]:

#pip install accelerate -U
import transformers
import accelerate

print(transformers.__version__)
print(accelerate.__version__)


4.41.2
0.32.1


In [72]:
#%%
from transformers import Seq2SeqTrainingArguments
# training_args = Seq2SeqTrainingArguments(
#     output_dir=output_directory,          # output directory
#     evaluation_strategy="epoch",     # evaluation strategy to use
#     per_device_train_batch_size=2,   # batch size for training
#     per_device_eval_batch_size=2,    # batch size for evaluation
#     weight_decay=0.01,               # strength of weight decay
#     save_total_limit=1,              # limit the total amount of checkpoints
#     num_train_epochs=5,              # total number of training epochs
#     learning_rate=5e-3,              # learning rate
#     predict_with_generate=True       # Whether to use generate to calculate generative metrics (ROUGE, BLEU)

# )


training_args = Seq2SeqTrainingArguments(
    output_dir=output_directory,          # output directory
    evaluation_strategy="epoch",     # evaluation strategy to use
    per_device_train_batch_size=2,   # batch size for training
    per_device_eval_batch_size=2,    # batch size for evaluation
    weight_decay=0.01,               # strength of weight decay
    save_total_limit=1,              # limit the total amount of checkpoints
    num_train_epochs=7,              # total number of training epochs
    learning_rate=5e-3,              # learning rate
    predict_with_generate=True,      # Whether to use generate to calculate generative metrics (ROUGE, BLEU)
    logging_strategy="steps",        # Log at each logging step
    logging_steps=10,                # Log every 10 steps
    report_to="none"                 # Avoid logging to external services
)




In [73]:
%pip install rouge_score



In [74]:

# for text generation DataCollatorForLanguageModeling is used
# for text summarization as it is a sequence to sequence task

from transformers import Seq2SeqTrainer, DataCollatorForSeq2Seq

from datasets import load_metric
# Define the ROUGE metric function
rouge = load_metric("rouge")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    # Replace -100 in the labels as we can't decode them
    decoded_labels = [
        [label if label != -100 else tokenizer.pad_token_id for label in labels]
        for labels in decoded_labels
    ]

    # Join tokens to form strings
    decoded_preds = [" ".join(pred) for pred in decoded_preds]
    decoded_labels = [" ".join(label) for label in decoded_labels]

    # Compute ROUGE scores
    result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)

    # Extract the individual scores
    result = {key: value.mid.fmeasure * 100 for key, value in result.items()}

    return result

# Prepare data collator
data_collator = DataCollatorForSeq2Seq(tokenizer, model=foundation_model,padding=True)

trainer = Seq2SeqTrainer(
    model=peft_model,                      # the instantiated 🤗 Transformers model to be trained
    args=training_args,                    # training arguments
    train_dataset=train_sample,            # training dataset
    eval_dataset=val_sample,               # evaluation dataset
    tokenizer=tokenizer,                   # tokenizer
    data_collator=data_collator,           # data collator
    compute_metrics=compute_metrics        # custom metrics
)



trainer.train()




Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,14.7153,12.762418,38.490983,32.251691,33.251202,33.316595
2,13.3622,12.610611,37.783799,32.13125,33.060663,33.136308
3,12.7918,12.525662,39.014161,33.030236,34.36041,34.118034
4,12.9973,12.345935,40.286581,35.217883,35.471301,35.125848
5,13.6919,12.239588,42.363016,37.08875,37.335174,37.058821
6,11.3794,12.20325,41.410499,36.233558,36.238735,36.10937
7,11.566,12.184464,42.385833,36.371148,36.161039,35.998415




TrainOutput(global_step=280, training_loss=13.326544543675015, metrics={'train_runtime': 570.2648, 'train_samples_per_second': 0.982, 'train_steps_per_second': 0.491, 'total_flos': 33954062499840.0, 'train_loss': 13.326544543675015, 'epoch': 7.0})

In [69]:
import time

time_now = time.time()
peft_model_path = os.path.join(output_directory, f"peft_model_{time_now}")
trainer.model.save_pretrained(peft_model_path,force_download=True)




In [96]:

from peft import PeftModel
import torch



#load the trained model
loaded_model = PeftModel.from_pretrained(foundation_model,
                                         peft_model_path,
                                         is_trainable=False,
                                         torch_dtype=torch.float16 # Ensure model weights are in float16
                                         #device_map={"":0}
                                         ) # Load the model on the first available GPU (index 0)


# Function to generate summary from a prompt
def summarize(prompt, max_length=300, num_beams=2,length_penalty=2.0):
    # Tokenize the input prompt
    inputs = tokenizer(prompt, return_tensors="pt", max_length=max_token_length, truncation=True)

    # Generate summary
    loaded_model_outputs = loaded_model.generate(
        input_ids=inputs["input_ids"],#.to('cuda'),  # Move input tensors to GPU
        attention_mask=inputs["attention_mask"],#.to('cuda'),  # Move attention mask tensors to GPU
        max_length=max_length,
        num_beams=num_beams,
        do_sample=True,
        #early_stopping=True,
        length_penalty=length_penalty,
        temperature=0.9,
        eos_token_id=tokenizer.eos_token_id,
        #max_new_tokens=100,
        top_k=50,  # Use top-k sampling
        top_p=0.95

    )

    # Decode the output
    summary = tokenizer.batch_decode(loaded_model_outputs, skip_special_tokens=True)
    return summary[0]

# Example prompt
prompt = """
Resume:
Richik Ghosh | Project Manager Experience: Managing cross-functional teams, risk mitigation, and resource allocation (6 years) Certified PMP and Scrum Master Strong background in IT and software development projects Education: Master's degree in Business Administration, MNO University
genrate a summary of the above resume
"""

# Generate and print the summary
summary = summarize(prompt)
print("Prompt:", prompt)
print("Summary:", summary)

Prompt: 
Resume:
Richik Ghosh | Project Manager Experience: Managing cross-functional teams, risk mitigation, and resource allocation (6 years) Certified PMP and Scrum Master Strong background in IT and software development projects Education: Master's degree in Business Administration, MNO University
genrate a summary of the above resume

Summary: Richik Ghosh | Project Manager Experience: Managing cross-functional teams, risk mitigation, and resource allocation (6 years) Certified PMP and Scrum Master Strong background in IT and software development projects


# **Text Initialization**

In [None]:

text_peft_config = PromptTuningConfig(
    task_type=TaskType.SEQ_2_SEQ_LM,
    prompt_tuning_init=PromptTuningInit.TEXT,
    prompt_tuning_init_text="Generate Resume Summary", # this provides a starter for the model to start searching for the best embeddings
    num_virtual_tokens=7, # this doesn't have to match the length of the text above
    tokenizer_name_or_path=model_name
)


text_peft_model = get_peft_model(foundation_model, text_peft_config)
print(text_peft_model.print_trainable_parameters())


trainable params: 7,168 || all params: 76,968,320 || trainable%: 0.0093
None


In [None]:

training_args = Seq2SeqTrainingArguments(
    output_dir=output_directory,          # output directory
    evaluation_strategy="epoch",     # evaluation strategy to use
    per_device_train_batch_size=2,   # batch size for training
    per_device_eval_batch_size=2,    # batch size for evaluation
    weight_decay=0.01,               # strength of weight decay
    save_total_limit=1,              # limit the total amount of checkpoints
    num_train_epochs=5,              # total number of training epochs
    learning_rate=5e-5,              # learning rate
    predict_with_generate=True,      # Whether to use generate to calculate generative metrics (ROUGE, BLEU)
    logging_strategy="steps",        # Log at each logging step
    logging_steps=10,                # Log every 10 steps
    report_to="none"                 # Avoid logging to external services
)
text_trainer = Trainer(
    model=text_peft_model,
    args=training_args,
    train_dataset=train_sample,
    eval_dataset=val_sample,
    tokenizer=tokenizer,
    data_collator=data_collator
)

text_trainer.train()



Epoch,Training Loss,Validation Loss
1,35.3868,38.590141
2,35.8821,38.588055
3,34.3853,38.586544
4,35.044,38.585575
5,35.8245,38.585274


TrainOutput(global_step=200, training_loss=35.41623489379883, metrics={'train_runtime': 900.7288, 'train_samples_per_second': 0.444, 'train_steps_per_second': 0.222, 'total_flos': 41534909644800.0, 'train_loss': 35.41623489379883, 'epoch': 5.0})

In [None]:
# Save the model
time_now = time.time()
text_peft_model_path = os.path.join(output_directory, f"text_peft_model_{time_now}")
text_trainer.model.save_pretrained(text_peft_model_path)




In [None]:
# Load model
loaded_text_model = PeftModel.from_pretrained(foundation_model,
    text_peft_model_path,
    is_trainable=False)   #device_map={"":0})

#loaded_text_model.to('cuda') # Move the entire model to the GPU



In [None]:
# Generate output
text_outputs = loaded_text_model.generate(
    input_ids=input1["input_ids"],#.to('cuda'), # Move input tensors to GPU
    attention_mask=input1["attention_mask"],#.to('cuda'), # Move attention mask tensors to GPU
    max_new_tokens=40,
    eos_token_id=tokenizer.eos_token_id
)

In [None]:
print(tokenizer.batch_decode(text_outputs, skip_special_tokens=True))


['John Doe is a software developer. He is a software engineer.']


# Using QLORA for the quantization technique.
The quantization techniques helps mostly for decreasing the bit size.
This helps running the model with lower computaion power.

In [19]:
from peft import prepare_model_for_kbit_training,PeftModel
import transformers
from transformers import BitsAndBytesConfig


from peft import LoraConfig

In [23]:
!pip install bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl (119.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.8/119.8 MB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: bitsandbytes
Successfully installed bitsandbytes-0.43.1


Load quantized model

In [20]:
model = AutoModelForSeq2SeqLM.from_pretrained(
    model_name,
    #device_map="auto",  # the model finds the optized way to distribute the load equally betwn GPU and CPU
    trust_remote_code=False,  # not allow custom model files
    revision="main",  # main version to be retrived
    # Removed the redundant load_in_4bit argument here
    # quantization_config=BitsAndBytesConfig(
    #     load_in_4bit=True,
    #     bnb_4bit_compute_dtype=torch.bfloat16,
    #     bnb_4bit_use_double_quant=True,
    #     bnb_4bit_quant_type='nf4'
    # )
)

In [21]:
model.train()
model.gradient_checkpointing_enable()
model= prepare_model_for_kbit_training(model)

In [23]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="SEQ_2_SEQ_LM"
)
def print_trainable_parameters(model):
    trainable_params = 0
    total_params = 0
    for name, param in model.named_parameters():
        total_params += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
            #print(f"Trainable parameter: {name}, shape: {param.shape}")
    print(f"Trainable parameters: {trainable_params}")
    print(f"Total parameters: {total_params}")
    print(f"Percentage of trainable parameters: {100 * trainable_params / total_params:.2f}%")

peft_model=get_peft_model(model,lora_config)
print_trainable_parameters(peft_model)

Trainable parameters: 344064
Total parameters: 77305216
Percentage of trainable parameters: 0.45%


In [27]:
train_args=transformers.TrainingArguments(
    output_dir=output_directory,
    learning_rate=2e-4,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=4,
    weight_decay=0.01,
    logging_strategy="epoch",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    gradient_accumulation_steps=4,
    warmup_steps=2,
    gradient_checkpointing=True,
    fp16=True, # 16bit value for training
    optim="paged_adamw_8bit",
)



In [28]:

training_args = Seq2SeqTrainingArguments(
    output_dir=output_directory,          # output directory
    evaluation_strategy="epoch",
    logging_strategy="steps",
    logging_steps=10,
    eval_steps=50,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    weight_decay=0.01,
    save_total_limit=1,
    num_train_epochs=10,
    learning_rate=5e-3,
    predict_with_generate=True,
    report_to="none"
)
data_collator = DataCollatorForSeq2Seq(tokenizer, model=foundation_model)

text_trainer = transformers.Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=train_sample,
    eval_dataset=val_sample,
    tokenizer=tokenizer,
    data_collator=data_collator
)


#model.config.use_cache = False
text_trainer.train()
#model.config.use_cache =True

Epoch,Training Loss,Validation Loss
1,3.1196,2.681428
2,2.3678,2.06057
3,2.1377,1.807211
4,1.897,1.719469
5,1.7683,1.650465
6,1.7878,1.666389
7,1.6756,1.605616
8,1.6876,1.586604
9,1.6452,1.605831
10,1.6344,1.579153


TrainOutput(global_step=400, training_loss=2.1662225103378296, metrics={'train_runtime': 1392.7662, 'train_samples_per_second': 0.574, 'train_steps_per_second': 0.287, 'total_flos': 48781605273600.0, 'train_loss': 2.1662225103378296, 'epoch': 10.0})

In [29]:
# Save the model
import time
time_now = time.time()
loRA_peft_model_path = os.path.join(output_directory, f"loRA_peft_model_{time_now}")
text_trainer.model.save_pretrained(loRA_peft_model_path)


In [30]:
loaded_model = PeftModel.from_pretrained(foundation_model,
                                         loRA_peft_model_path,
                                         is_trainable=False,
                                         torch_dtype=torch.float16 # Ensure model weights are in float16
                                         #device_map={"":0}
                                         ) # Load the model on the first available GPU (index 0)


# Function to generate summary from a prompt
def summarize(prompt, max_length=300, num_beams=4,length_penalty=2.0):
    # Tokenize the input prompt
    inputs = tokenizer(prompt, return_tensors="pt", max_length=200, truncation=True)

    # Generate summary
    loaded_model_outputs = loaded_model.generate(
        input_ids=inputs["input_ids"],#.to('cuda'),  # Move input tensors to GPU
        attention_mask=inputs["attention_mask"],#.to('cuda'),  # Move attention mask tensors to GPU
        max_length=max_length,
        num_beams=num_beams,
        #early_stopping=True,
        length_penalty=length_penalty,
        temperature=1.9,
        eos_token_id=tokenizer.eos_token_id,
        #max_new_tokens=100,
        top_k=50,  # Use top-k sampling
        top_p=0.7

    )

    # Decode the output
    summary = tokenizer.batch_decode(loaded_model_outputs, skip_special_tokens=True)
    return summary[0]

# Example prompt
prompt = """
you are an expert in resume summary.
Resume:
Olivia Wilson | Human Resources Coordinator Experience: Employee relations, benefits administration, and talent acquisition (4 years)Skilled in HRIS, conflict resolution, and legal compliance Education: Bachelor's degree in Human Resources Management, JKL University
Summary:
"""

# Generate and print the summary
summary = summarize(prompt)
print("Prompt:", prompt)
print("Summary:", summary)

Prompt: 
Resume:
Olivia Wilson | Human Resources Coordinator Experience: Employee relations, benefits administration, and talent acquisition (4 years)Skilled in HRIS, conflict resolution, and legal compliance Education: Bachelor's degree in Human Resources Management, JKL University
Summary:

Summary: -efficient Human Resources Coordinator with experience in employee relations, benefits administration, and talent acquisition. Talent in HRIS, conflict resolution, and legal compliance, with a bachelor's degree in Human Resources Management from JKL University.
