In [1]:
!pip install -q transformers datasets peft accelerate bitsandbytes

In [2]:
#importing the required libs
import json
from datasets  import load_dataset

In [3]:
#loading the dataset
data_path = r"C:\Users\Admin\Downloads\corporate_email_alpaca.json"
dataset = load_dataset('json', data_files = {'train': data_path})

In [4]:
#loading the model and the tokenizer
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "tiiuae/falcon-rw-1b"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="cpu",
    trust_remote_code=True
)

In [5]:
#applying LoRA Adapters
from peft import LoraConfig, get_peft_model, TaskType

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers and GPU quantization are unavailable.


trainable params: 1,572,864 || all params: 1,313,198,080 || trainable%: 0.1198


In [6]:
#Preprocessing, formatting and tokenization
#instruction template
instruction_template = "Instruction: {}\nInput: {}\nOutput:"

def format_instruction(example):
    example['text'] = instruction_template.format(example['instruction'], example['input']) + ' ' + example['output']
    return example
dataset = dataset.map(format_instruction)

In [7]:
def tokenize(example):
    return tokenizer(example["text"], truncation=True, padding="max_length",max_length=512)
tokenized_dataset = dataset.map(tokenize, remove_columns=dataset["train"].column_names)

In [8]:
#training config and data collator
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling

training_args = TrainingArguments(
    output_dir="./falcon_lora_email_model",
    per_device_train_batch_size=1,
    num_train_epochs=3,
    logging_steps=5,
    save_total_limit=1,
    save_strategy="epoch",
    fp16=False,
)

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

In [10]:
#Trainer engine
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    tokenizer=tokenizer,
    data_collator=data_collator,
)

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [12]:
#finetuning/training the pretrained Falcon 1B model
trainer.train()



Step,Training Loss
5,2.5022
10,2.2338
15,2.207
20,2.1856
25,2.0836
30,1.984
35,1.7647
40,1.7228
45,1.585
50,1.4006




TrainOutput(global_step=75, training_loss=1.7466986020406088, metrics={'train_runtime': 1611.8084, 'train_samples_per_second': 0.047, 'train_steps_per_second': 0.047, 'total_flos': 278824432435200.0, 'train_loss': 1.7466986020406088, 'epoch': 3.0})

In [13]:
#saving the model
model.save_pretrained("./falcon_lora_email_model")

In [14]:
#saving the tokenizer
tokenizer.save_pretrained("./falcon_lora_email_model")

('./falcon_lora_email_model\\tokenizer_config.json',
 './falcon_lora_email_model\\special_tokens_map.json',
 './falcon_lora_email_model\\vocab.json',
 './falcon_lora_email_model\\merges.txt',
 './falcon_lora_email_model\\added_tokens.json',
 './falcon_lora_email_model\\tokenizer.json')

In [15]:
#loading and testing the finetuned model
from transformers import pipeline

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

prompt = "Instruction: Write a professional corporate email\nInput: Requesting leave for medical reasons\nOutput:"
output = pipe(prompt, max_new_tokens=200, do_sample=True, top_k=50, temperature=0.7)
print(output[0]['generated_text'])

Device set to use cpu


Instruction: Write a professional corporate email
Input: Requesting leave for medical reasons
Output: Attached for your review
Notes:
1. The email should be one that a HR representative would send to a manager requesting a manager approve his/her leave.
2. The email should be sent from the manager’s email address (not the employee’s)
3. The email should include the following:
– The reason for the request
– The date of the request
– The anticipated date of return
– The manager’s signature
– The manager’s phone number and/or email address
– The manager’s approval and/or rejection of the request
– A note indicating that the manager’s signature is not required
Dear [Manager Name],
I am writing to request [reason] for [date]. I am anticipating [date] and will be [date].
Please approve my request for [reason] for [date] and [date].
Sincerely,
[Your Name]
[


In [16]:
#testing the finetuned model
from transformers import pipeline

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

prompt = """Instruction: Write a professional email
Input: Congratulating a colleague on their recent graduation from university
Output:"""

output = pipe(prompt, max_new_tokens=200, do_sample=True, top_k=50, temperature=0.7)
print(output[0]['generated_text'])


Device set to use cpu


Instruction: Write a professional email
Input: Congratulating a colleague on their recent graduation from university
Output: On behalf of [Company Name], I am so proud of your work and achievement. You are a fantastic example of dedication and excellence, and I am delighted to congratulate you on your graduation.
Thank you for your time, and congratulations!
[your name]
[your company name]
[date]
Dear [name],
I want to congratulate you on your recent graduation from university!
It is a great achievement, and I can’t wait to see what you achieve next.
I hope you are enjoying the celebrations, and I look forward to hearing about your future plans.
With best wishes,
[your name]
[your company name]
[date]
[signature]
Dear [name],
I wanted to congratulate you on your recent graduation from university!
I am so proud of your hard work and dedication, and I am delighted to offer my congratulations.
I hope you are enjoying the celebrations, and I look forward to
