In [2]:
#pip install transformers


In [3]:
pip install peft



In [4]:
pip install datasets



Importing the model

In [29]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "bigscience/bloomz-560m"
# Check if CUDA is available and set device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

tokenizer = AutoTokenizer.from_pretrained(model_name)
foundation_model = AutoModelForCausalLM.from_pretrained(model_name).to(device) # Move the model to the device


Checking my model how it performs in zero shot for summarization

In [74]:
# This is just to check the model with no fine tuning



resume_text = "John Doe | Software Developer Experience: Java, Python, and C++ (4 years) Agile methodologies, RESTful APIs, and database management Education: Bachelor's degree in Computer Science, XYZ University"

input1 = tokenizer(f"Resume: {resume_text}\nSummarize this Resume:", return_tensors="pt").to(device)

foundation_outputs = foundation_model.generate(
    input_ids=input1["input_ids"],
    attention_mask=input1["attention_mask"],
    max_new_tokens=50,
    do_sample=True,
    temperature=1,
    eos_token_id=tokenizer.eos_token_id
    )
print(tokenizer.batch_decode(foundation_outputs, skip_special_tokens=True))


["Resume: John Doe | Software Developer Experience: Java, Python, and C++ (4 years) Agile methodologies, RESTful APIs, and database management Education: Bachelor's degree in Computer Science, XYZ University\nSummarize this Resume: Job: Software Engineer"]


In [7]:
from google.colab import drive
import os
# Mount Google Drive
drive.mount('/content/drive')



path = '/content/drive/MyDrive/Colab Notebooks/Text Summarization'
files = os.listdir(path)
print(files)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
['Datasets', '.ipynb_checkpoints', 'working_dir', 'main.ipynb']


Load The Resume dataset

In [32]:
from datasets import load_dataset

ds = load_dataset("burberg92/resume_summary",cache_dir="/content/drive/MyDrive/Colab Notebooks/Text Summarization/Datasets")
data = ds.map(lambda samples: tokenizer(samples["resume"]), batched=True)
train_sample = data["train"]
print(train_sample)

Dataset({
    features: ['resume', 'ex_summary', 'input_ids', 'attention_mask'],
    num_rows: 100
})


In [33]:
from peft import  get_peft_model, PromptTuningConfig, TaskType, PromptTuningInit

peft_config = PromptTuningConfig(
    task_type=TaskType.CAUSAL_LM,
    prompt_tuning_init=PromptTuningInit.RANDOM,
    num_virtual_tokens=4,
    tokenizer_name_or_path=model_name
)
peft_model = get_peft_model(foundation_model, peft_config)


In [34]:
print(peft_model.print_trainable_parameters())


trainable params: 4,096 || all params: 559,218,688 || trainable%: 0.0007
None


Creating a directory for training parameters


In [35]:

from transformers import TrainingArguments
import os

output_directory = "/content/drive/MyDrive/Colab Notebooks/Text Summarization/working_dir"

if not os.path.exists(output_directory):
    os.mkdir(output_directory)
if not os.path.exists(output_directory):
    os.mkdir(output_directory)

In [36]:

#pip install accelerate -U
import transformers
import accelerate

print(transformers.__version__)
print(accelerate.__version__)


4.42.3
0.32.1


In [37]:
#%%
training_args = TrainingArguments(
    output_dir=output_directory, # Where the model predictions and checkpoints will be written
    #no_cuda=NO_CUDA, # This is necessary for CPU clusters.
    auto_find_batch_size=True, # Find a suitable batch size that will fit into memory automatically
    learning_rate= 3e-3, # Higher learning rate than full fine-tuning
    num_train_epochs=10 # Number of passes to go through the entire fine-tuning dataset
)


In [38]:


from transformers import Trainer, DataCollatorForLanguageModeling, TrainingArguments

trainer = Trainer(
    model=peft_model,  # We pass in the PEFT version of the foundation model, bloomz-560M
    args=training_args,
    train_dataset=train_sample,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)  # mlm=False indicates not to use masked language modeling
)

trainer.train()



Step,Training Loss


TrainOutput(global_step=130, training_loss=3.4483741173377402, metrics={'train_runtime': 112.9985, 'train_samples_per_second': 8.85, 'train_steps_per_second': 1.15, 'total_flos': 275557520474112.0, 'train_loss': 3.4483741173377402, 'epoch': 10.0})

In [39]:
import time

time_now = time.time()
peft_model_path = os.path.join(output_directory, f"peft_model_{time_now}")
trainer.model.save_pretrained(peft_model_path)


In [94]:

from peft import PeftModel
import torch


# ... your existing code ...

loaded_model = PeftModel.from_pretrained(foundation_model,
                                         peft_model_path,
                                         is_trainable=False,
                                         torch_dtype=torch.float16, # Ensure model weights are in float16
                                         device_map={"":0}) # Load the model on the first available GPU (index 0)

# ... your existing code ...

loaded_model_outputs = loaded_model.generate(
    input_ids=input1["input_ids"].to('cuda'), # Move input tensors to GPU
    attention_mask=input1["attention_mask"].to('cuda'), # Move attention mask tensors to GPU
    max_new_tokens=70,
    eos_token_id=tokenizer.eos_token_id
    )
print(tokenizer.batch_decode(loaded_model_outputs, skip_special_tokens=True))

["Resume: John Doe | Software Developer Experience: Java, Python, and C++ (4 years) Agile methodologies, RESTful APIs, and database management Education: Bachelor's degree in Computer Science, XYZ University\nSummarize this Resume: John Doe is a software developer with over 4 years of experience in Java, Python, and C++ (4 years) in the software development industry. He has worked in the following areas:"]


In [95]:

text_peft_config = PromptTuningConfig(
    task_type=TaskType.CAUSAL_LM,
    prompt_tuning_init=PromptTuningInit.TEXT,
    prompt_tuning_init_text="Generate Resume Summary", # this provides a starter for the model to start searching for the best embeddings
    num_virtual_tokens=7, # this doesn't have to match the length of the text above
    tokenizer_name_or_path=model_name
)
text_peft_model = get_peft_model(foundation_model, text_peft_config)
print(text_peft_model.print_trainable_parameters())


trainable params: 7,168 || all params: 559,221,760 || trainable%: 0.0013
None


In [82]:

text_trainer = Trainer(
    model=text_peft_model,
    args=training_args,
    train_dataset=train_sample,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

text_trainer.train()

Step,Training Loss


TrainOutput(global_step=130, training_loss=2.442767803485577, metrics={'train_runtime': 116.6201, 'train_samples_per_second': 8.575, 'train_steps_per_second': 1.115, 'total_flos': 275557520474112.0, 'train_loss': 2.442767803485577, 'epoch': 10.0})

In [85]:
# Save the model
time_now = time.time()
text_peft_model_path = os.path.join(output_directory, f"text_peft_model_{time_now}")
text_trainer.model.save_pretrained(text_peft_model_path)


In [91]:
# Load model
loaded_text_model = PeftModel.from_pretrained(foundation_model,
    text_peft_model_path,
    is_trainable=False,    device_map={"":0})

loaded_text_model.to('cuda') # Move the entire model to the GPU



PeftModelForCausalLM(
  (base_model): BloomForCausalLM(
    (transformer): BloomModel(
      (word_embeddings): Embedding(250880, 1024)
      (word_embeddings_layernorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      (h): ModuleList(
        (0-23): 24 x BloomBlock(
          (input_layernorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (self_attention): BloomAttention(
            (query_key_value): Linear(in_features=1024, out_features=3072, bias=True)
            (dense): Linear(in_features=1024, out_features=1024, bias=True)
            (attention_dropout): Dropout(p=0.0, inplace=False)
          )
          (post_attention_layernorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (mlp): BloomMLP(
            (dense_h_to_4h): Linear(in_features=1024, out_features=4096, bias=True)
            (gelu_impl): BloomGelu()
            (dense_4h_to_h): Linear(in_features=4096, out_features=1024, bias=True)
          )
        )
      

In [92]:
# Generate output
text_outputs = loaded_text_model.generate(
    input_ids=input1["input_ids"].to('cuda'), # Move input tensors to GPU
    attention_mask=input1["attention_mask"].to('cuda'), # Move attention mask tensors to GPU
    max_new_tokens=50,
    eos_token_id=tokenizer.eos_token_id
)

In [93]:
print(tokenizer.batch_decode(text_outputs, skip_special_tokens=True))


["Resume: John Doe | Software Developer Experience: Java, Python, and C++ (4 years) Agile methodologies, RESTful APIs, and database management Education: Bachelor's degree in Computer Science, XYZ University\nSummarize this Resume: I have 5 years of experience in the software development industry."]
