# Set up kernel

In [2]:
%pip install --upgrade pip
%pip install --disable-pip-version-check \
    torch==1.13.1 \
    torchdata==0.5.1 \
%pip install \
    transformers==4.27.2 \
    datasets==2.11.0 \
    evaluate==0.4.0 \
    rouge_score=0.1.2 \
    loralib==0.1.1 \
    peft==0.3.0 --quiet 

Collecting pip
  Obtaining dependency information for pip from https://files.pythonhosted.org/packages/8a/6a/19e9fe04fca059ccf770861c7d5721ab4c2aebc539889e97c7977528a53b/pip-24.0-py3-none-any.whl.metadata
  Using cached pip-24.0-py3-none-any.whl.metadata (3.6 kB)
Using cached pip-24.0-py3-none-any.whl (2.1 MB)
Installing collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 23.2.1
    Uninstalling pip-23.2.1:
      Successfully uninstalled pip-23.2.1
Successfully installed pip-24.0
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


ERROR: Invalid requirement: '%pip'


# Import necessary components 

In [7]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM,AutoTokenizer,GenerationConfig,TrainingArguments,Trainer
import torch
import time 
import evaluate
import pandas as pd
import numpy as np


# Load Dataset and LLM 

In [17]:
huggingface_dataset_name="knkarthick/dialogsum"
dataset=load_dataset(huggingface_dataset_name)
dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 12460
    })
    validation: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 500
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 1500
    })
})

In [12]:
model_name='google/flan-t5-base'
original_model=AutoModelForSeq2SeqLM.from_pretrained(model_name,torch_dtype=torch.bfloat16)
tokenizer=AutoTokenizer.from_pretrained(model_name)

In [13]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params=0
    all_model_params=0
    for _,param in model.named_parameters():
        all_model_params+=param.numel()
        if param.requires_grad:
            trainable_model_params+=param.numel()
        percentage_trainable_params = (trainable_model_params / all_model_params) * 100
    return f"trainable_model_parameters:{trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters:{percentage_trainable_params:.2f}%"
print(print_number_of_trainable_model_parameters(original_model))

trainable_model_parameters:247577856
all model parameters: 247577856
percentage of trainable model parameters:100.00%


# Test the Model with zero shot inference 

In [18]:
index =200

dialogue=dataset['test'][index]['dialogue']
summary=dataset['test'][index]['summary']

prompt=f""""
Summarize the following conversation.

{dialogue}
summary:
"""
inputs=tokenizer(prompt,return_tensors='pt')
output=tokenizer.decode(original_model.generate(inputs["input_ids"],max_new_tokens=200,)[0],skip_special_tokens=True)
dash_line='-'.join(''for x in range (100))
print(dash_line)
print(f'INPUT PROMPT:\n{prompt}')
print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{summary}')
print(dash_line)
print(f'MODEL GENERATION ZERO SHOT:\n{output}')

---------------------------------------------------------------------------------------------------
INPUT PROMPT:
"
Summarize the following conversation.

#Person1#: Have you considered upgrading your system?
#Person2#: Yes, but I'm not sure what exactly I would need.
#Person1#: You could consider adding a painting program to your software. It would allow you to make up your own flyers and banners for advertising.
#Person2#: That would be a definite bonus.
#Person1#: You might also want to upgrade your hardware because it is pretty outdated now.
#Person2#: How can we do that?
#Person1#: You'd probably need a faster processor, to begin with. And you also need a more powerful hard disc, more memory and a faster modem. Do you have a CD-ROM drive?
#Person2#: No.
#Person1#: Then you might want to add a CD-ROM drive too, because most new software programs are coming out on Cds.
#Person2#: That sounds great. Thanks.
summary:

-------------------------------------------------------------------

# PERFORM FULL FINE TUNING ## you need to convert dialog-summary pairs into explicit instructions for the LLM

In [48]:
def tokenize_function(example):
    start_prompt='Summarize the following conversation.\n\n'
    end_prompt='\n\nSummary'
    prompt=[start_prompt+dialogue+end_prompt for dialogue in example["dialogue"]]
    example['input_ids']=tokenizer(prompt,padding="max_length",truncation=True,return_tensors="pt").input_ids
    example['labels']=tokenizer(example["summary"],padding="max_length",truncation=True,return_tensors="pt").input_ids
    return example

# The dataset actually contains 3 diff splits: train ,validation,test.
# the tokenize_function code is handling all data across all splits in batches 
tokenized_datasets=dataset.map(tokenize_function,batched=True)
tokenized_datasets=tokenized_datasets.remove_columns(['id','topic','dialogue','summary',])

In [49]:
tokenized_datasets=tokenized_datasets.filter(lambda example,index:index%100==0,with_indices=True)


In [9]:
print("Shapes of the dataset:")
print(f"Training:{tokenized_datasets['train'].shape}")
print(f"Validation:{tokenized_datasets['validation'].shape}")
print(f"Test:{tokenized_datasets['test'].shape}")
print(tokenized_datasets)

Shapes of the dataset:
Training:(125, 2)
Validation:(5, 2)
Test:(15, 2)
DatasetDict({
    train: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 125
    })
    validation: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 5
    })
    test: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 15
    })
})


# Fine Tune The model with the preprocessed Dataset

In [10]:
## utilize the built in Hugging face Trainer class.Pass the preprocessed dataset with ref to the original model
output_dir=f'./dialogue-summary-training-{str(int(time.time()))}'

training_args=TrainingArguments(
    output_dir=output_dir,
    learning_rate=1e-5,
    num_train_epochs=1,
    weight_decay=0.01,
    logging_steps=1,
    max_steps=1
)
trainer=Trainer(
    model=original_model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation']
)

In [None]:
trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33mbhagyashreetikhe52[0m ([33martificial_intelligence[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [32]:
!aws s3 cp --recursive s3://dlai-generative-ai/models/flan-dialogue-summary-checkpoint/ ./flan-dialogue-summary-checkpoint/


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
download: s3://dlai-generative-ai/models/flan-dialogue-summary-checkpoint/generation_config.json to flan-dialogue-summary-checkpoint/generation_config.json
download: s3://dlai-generative-ai/models/flan-dialogue-summary-checkpoint/trainer_state.json to flan-dialogue-summary-checkpoint/trainer_state.json
download: s3://dlai-generative-ai/models/flan-dialogue-summary-checkpoint/rng_state.pth to flan-dialogue-summary-checkpoint/rng_state.pth
download: s3://dlai-generative-ai/models/flan-dialogue-summary-checkpoint/config.json to flan-dialogue-summary-checkpoint/config.json
download: s3://dlai-generative-ai/models/flan-dialogue-summary-checkpoint/scheduler.pt to flan-dialogue-summary-checkpoint/scheduler.pt
downl

In [2]:
!ls -alh ./flan-dialogue-summary-checkpoint/pytorch_model.bin

-rw-r--r-- 1 sagemaker-user users 945M May 15  2023 ./flan-dialogue-summary-checkpoint/pytorch_model.bin


In [21]:
instruct_model=AutoModelForSeq2SeqLM.from_pretrained("./flan-dialogue-summary-checkpoint",torch_dtype=torch.bfloat16)

# EVALUATE THE MODEL QUALITATIVELY(HUMAN EVALUATION)

In [20]:
index=200
dialogue=dataset['test'][index]['dialogue']
human_baseline_summary=dataset['test'][index]['summary']
prompt=f""""
Summarize the following conversation.

{dialogue}

Summary:
"""
input_ids=tokenizer(prompt,return_tensors='pt').input_ids

original_model_outputs=original_model.generate(input_ids=input_ids,generation_config=GenerationConfig(max_new_tokens=200,num_beams=1))
original_model_text_output=tokenizer.decode(original_model_outputs[0],skip_special_tokens=True)

instruct_model_outputs=instruct_model.generate(input_ids=input_ids,generation_config=GenerationConfig(max_new_tokens=200,num_beams=1))
instruct_model_text_output=tokenizer.decode(instruct_model_outputs[0],skip_special_tokens=True)
dash_line='-'.join(''for x in range (100))
print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{human_baseline_summary}')
print(dash_line)
print(f'ORIGINAL MODEL:\n{original_model_text_output}')
print(dash_line)
print(f'INSTRUCT MODEL:/n{instruct_model_text_output}')

NameError: name 'instruct_model' is not defined

# Evaluate the model Quantitavely(ROUGE METRIC) 
### The ROUGE metric helps the quantify the validity of summarization produced by models.it compares summarization to a "baseline"summary 
### which is created by human

In [14]:
%pip install rouge_score
rouge=evaluate.load('rouge')


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25ldone
[?25h  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24933 sha256=19da958003eb242d0999635ec7ae083e804b23df76fa71112219ba78a87f6904
  Stored in directory: /home/sagemaker-user/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2
Note: you may need to restart the kernel to use updated packages.


In [15]:
rouge=evaluate.load('rouge')

In [24]:
dialogues=dataset['test'][0:10]['dialogue']
human_baseline_summaries=dataset['test'][0:10]['summary']

original_model_summaries=[]
instruct_model_summaries=[]

for _, dialogue in enumerate(dialogues):
    prompt=f""""
Summarize the following conversation.

{dialogue}
Summary:"""
    input_ids=tokenizer(prompt,return_tensors='pt').input_ids
    original_model_outputs=original_model.generate(input_ids=input_ids,generation_config=GenerationConfig(max_new_tokens=200))
    original_model_text_output=tokenizer.decode(original_model_outputs[0],skip_special_tokens=True)
    original_model_summaries.append(original_model_text_output)
    instruct_model_outputs=instruct_model.generate(input_ids=input_ids,generation_config=GenerationConfig(max_new_tokens=200))
    instruct_model_text_output=tokenizer.decode(instruct_model_outputs[0],skip_special_tokens=True)
    instruct_model_summaries.append(instruct_model_text_output)

zipped_summaries=list(zip(human_baseline_summaries,original_model_summaries,instruct_model_summaries))

df=pd.DataFrame(zipped_summaries,columns=['human_baseline_summaries','original_model_summaries','instruct_model_summaries'])
df
    

Unnamed: 0,human_baseline_summaries,original_model_summaries,instruct_model_summaries
0,Ms. Dawson helps #Person1# to write a memo to ...,"Then, you'll have to go to the hospital.","Then, the man asks the woman to tell him the n..."


In [29]:
original_model_results=rouge.compute(
    predictions=original_model_summaries,
    references=human_baseline_summaries[0:len(original_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)
instruct_model_results=rouge.compute(
    predictions=instruct_model_summaries,
    references=human_baseline_summaries[0:len(instruct_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)
print('ORIGINAL MODEL:')
print(original_model_results)
print('INSTRUCT MODEL:')
print(instruct_model_results)

ORIGINAL MODEL:
{'rouge1': 0.2222222222222222, 'rouge2': 0.058823529411764705, 'rougeL': 0.16666666666666666, 'rougeLsum': 0.16666666666666666}
INSTRUCT MODEL:
{'rouge1': 0.125, 'rouge2': 0.0, 'rougeL': 0.125, 'rougeLsum': 0.125}


In [36]:
print("Absolute percentage improvement of INSTRUCT MODEL over HUMAN BASELINE")
improvement=(np.array(list(instruct_model_results.values())) -np.array(list(original_model_results.values())))
for key,value in zip(instruct_model_results.keys(),improvement):
             print(f'{key}:{value*100:.2f}%')


Absolute percentage improvement of INSTRUCT MODEL over HUMAN BASELINE
rouge1:-9.72%
rouge2:-5.88%
rougeL:-4.17%
rougeLsum:-4.17%


# PERFORM PARAMETER EFFICIENT FINE TUNING

In [52]:
from peft import LoraConfig,get_peft_model,TaskType
lora_config=LoraConfig(
    r=32,#Rank
    lora_alpha=32,
    target_modules=["q","v"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM
)

In [53]:
peft_model=get_peft_model(original_model,lora_config)
print(print_number_of_trainable_model_parameters(peft_model))

trainable_model_parameters:3538944
all model parameters: 251116800
percentage of trainable model parameters:1.41%


# Train PEFT Adapter


In [54]:
output_dir=f'./peft-dialogue-summary-training-{str(int(time.time()))}'

peft_training_args=TrainingArguments(
    output_dir=output_dir,
    auto_find_batch_size=True,
    learning_rate=1e-3,
    num_train_epochs=1,
    logging_steps=1,
    max_steps=1
)
peft_trainer=Trainer(
    model=peft_model,
    args=peft_training_args,
    train_dataset=tokenized_datasets["train"],
)

In [None]:
peft_trainer.train()
peft_model_path="./peft-dialogue-summary-checkpoint-local"
peft_trainer.model.save_pretrained(peft_model_path)
tokenizer.save_pretrained(peft_model_path)

[34m[1mwandb[0m: Currently logged in as: [33mbhagyashreetikhe52[0m ([33martificial_intelligence[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [1]:
('./peft-dialogue-summary-checkpoint-local/tokenizer_config.json',
 './peft-dialogue-summary-checkpoint-local/special_tokens_map.json',
 './peft-dialogue-summary-checkpoint-local/tokenizer.json')

('./peft-dialogue-summary-checkpoint-local/tokenizer_config.json',
 './peft-dialogue-summary-checkpoint-local/special_tokens_map.json',
 './peft-dialogue-summary-checkpoint-local/tokenizer.json')

In [4]:
!aws s3 cp --recursive s3://dlai-generative-ai/models/peft-dialogue-summary-checkpoint/ ./peft-dialogue-summary-checkpoint-from-s3/

download: s3://dlai-generative-ai/models/peft-dialogue-summary-checkpoint/adapter_config.json to peft-dialogue-summary-checkpoint-from-s3/adapter_config.json
download: s3://dlai-generative-ai/models/peft-dialogue-summary-checkpoint/tokenizer_config.json to peft-dialogue-summary-checkpoint-from-s3/tokenizer_config.json
download: s3://dlai-generative-ai/models/peft-dialogue-summary-checkpoint/special_tokens_map.json to peft-dialogue-summary-checkpoint-from-s3/special_tokens_map.json
download: s3://dlai-generative-ai/models/peft-dialogue-summary-checkpoint/tokenizer.json to peft-dialogue-summary-checkpoint-from-s3/tokenizer.json
download: s3://dlai-generative-ai/models/peft-dialogue-summary-checkpoint/adapter_model.bin to peft-dialogue-summary-checkpoint-from-s3/adapter_model.bin


In [5]:
!ls -al ./peft-dialogue-summary-checkpoint-from-s3/adapter_model.bin

-rw-r--r-- 1 sagemaker-user users 14208525 May 15  2023 ./peft-dialogue-summary-checkpoint-from-s3/adapter_model.bin


In [9]:
from peft import PeftModel,PeftConfig

peft_model_base=AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base",torch_dtype=torch.bfloat16)
tokenizer=AutoTokenizer.from_pretrained("google/flan-t5-base")

peft_model=PeftModel.from_pretrained(peft_model_base,'./peft-dialogue-summary-checkpoint-from-s3/',torch_dtype=torch.bfloat16,is_trainable=False)

In [14]:
print(print_number_of_trainable_model_parameters(peft_model))

trainable_model_parameters:0
all model parameters: 251116800
percentage of trainable model parameters:0.00%


# Evaluate the model Qualitatively(human evaluatiion)

In [22]:
index=200
dialogue=dataset['test'][index]['dialogue']
baseline_human_summary=dataset['test'][index]['summary']
prompt=f""""
Sumarize the following conversation.
{dialogue}
Summary:"""
input_ids=tokenizer(prompt,return_tensors="pt").input_ids

original_model_outputs=original_model.generate(input_ids=input_ids,generation_config=GenerationConfig(max_new_tokens=200,num_beams=1))
original_model_text_output=tokenizer.decode(original_model_outputs[0],skip_special_tokens=True)
instruct_model_outputs=instruct_model.generate(input_ids=input_ids,generation_config=GenerationConfig(max_new_tokens=200,num_beams=1))
instruct_model_text_output=tokenizer.decode(instruct_model_outputs[0],skip_special_tokens=True)
peft_model_outputs=original_model.generate(input_ids=input_ids,generation_config=GenerationConfig(max_new_tokens=200,num_beams=1))
peft_model_text_output=tokenizer.decode(peft_model_outputs[0],skip_special_tokens=True)

print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{human_baseline_summary}')
print(dash_line)
print(f'ORIGINAL MODEL:\n{original_model_text_output}')
print(dash_line)
print(f'INSTRUCT MODEL:\n{instruct_model_text_output}')
print(dash_line)
print(f'PEFT MODEL:{peft_model_text_output}')

---------------------------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.
---------------------------------------------------------------------------------------------------
ORIGINAL MODEL:
#Person1#: I'm thinking of upgrading my computer. #Person2#: I'm not sure what exactly I would need. #Person1#: I'd probably need a painting program. #Person2#: I'd probably need a faster processor, more memory and a faster modem. #Person1#: I'd probably need a CD-ROM drive too.
---------------------------------------------------------------------------------------------------
INSTRUCT MODEL:
#Person1# suggests #Person2# adding a painting program to #Person2#'s software and upgrading the hardware. #Person2# also wants to add a CD-ROM drive.
---------------------------------------------------------------------------------------------------
PEFT MODEL:#Person1#: I'm

# Evaluate the Model Quantitatively(with ROUGE Metric)

In [26]:
dialogues=dataset['test'][0:10]['dialogue']
human_baseline_summaries=dataset['test'][0:10]['summary']

original_model_summaries=[]
instruct_model_summaries=[]
peft_model_summaries=[]

for idx, dialogue in enumerate(dialogues):
    prompt=f""""
Summarize the following conversation.

{dialogue}
Summary:"""
    input_ids=tokenizer(prompt,return_tensors='pt').input_ids
    human_baseline_text_output=human_baseline_summaries[idx]
    
    original_model_outputs=original_model.generate(input_ids=input_ids,generation_config=GenerationConfig(max_new_tokens=200))
    original_model_text_output=tokenizer.decode(original_model_outputs[0],skip_special_tokens=True)
   
    instruct_model_outputs=instruct_model.generate(input_ids=input_ids,generation_config=GenerationConfig(max_new_tokens=200))
    instruct_model_text_output=tokenizer.decode(instruct_model_outputs[0],skip_special_tokens=True)

    peft_model_outputs=peft_model.generate(input_ids=input_ids,generation_config=GenerationConfig(max_new_tokens=200))
    peft_model_text_output=tokenizer.decode(peft_model_outputs[0],skip_special_tokens=True)
    
    original_model_summaries.append(original_model_text_output)
    instruct_model_summaries.append(instruct_model_text_output)
    peft_model_summaries.append(peft_model_text_output)

zipped_summaries=list(zip(human_baseline_summaries,original_model_summaries,instruct_model_summaries,peft_model_summaries))

df=pd.DataFrame(zipped_summaries,columns=['human_baseline_summaries','original_model_summaries','instruct_model_summaries','peft_model_summaries'])
df

Unnamed: 0,human_baseline_summaries,original_model_summaries,instruct_model_summaries,peft_model_summaries
0,Ms. Dawson helps #Person1# to write a memo to ...,The memo will go out to all employees by this ...,#Person1# asks Ms. Dawson to take a dictation ...,#Person1# asks Ms. Dawson to take a dictation ...
1,In order to prevent employees from wasting tim...,The memo will go out to all employees by this ...,#Person1# asks Ms. Dawson to take a dictation ...,#Person1# asks Ms. Dawson to take a dictation ...
2,Ms. Dawson takes a dictation for #Person1# abo...,The memo will go out to all employees by this ...,#Person1# asks Ms. Dawson to take a dictation ...,#Person1# asks Ms. Dawson to take a dictation ...
3,#Person2# arrives late because of traffic jam....,The traffic jam at the Carrefour intersection ...,#Person2# got stuck in traffic again. #Person1...,#Person2# got stuck in traffic and #Person1# s...
4,#Person2# decides to follow #Person1#'s sugges...,The traffic jam at the Carrefour intersection ...,#Person2# got stuck in traffic again. #Person1...,#Person2# got stuck in traffic and #Person1# s...
5,#Person2# complains to #Person1# about the tra...,The traffic jam at the Carrefour intersection ...,#Person2# got stuck in traffic again. #Person1...,#Person2# got stuck in traffic and #Person1# s...
6,#Person1# tells Kate that Masha and Hero get d...,Masha and Hero are getting divorced.,Masha and Hero are getting divorced. Kate can'...,Kate tells #Person2# Masha and Hero are gettin...
7,#Person1# tells Kate that Masha and Hero are g...,Masha and Hero are getting divorced.,Masha and Hero are getting divorced. Kate can'...,Kate tells #Person2# Masha and Hero are gettin...
8,#Person1# and Kate talk about the divorce betw...,Masha and Hero are getting divorced.,Masha and Hero are getting divorced. Kate can'...,Kate tells #Person2# Masha and Hero are gettin...
9,#Person1# and Brian are at the birthday party ...,"#Person1#: Happy birthday, Brian. #Person2#: T...",Brian's birthday is coming. #Person1# invites ...,Brian remembers his birthday and invites #Pers...


In [28]:
rouge=evaluate.load('rouge')
original_model_results=rouge.compute(
    predictions=original_model_summaries,
    references=human_baseline_summaries[0:len(original_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)
instruct_model_results=rouge.compute(
    predictions=instruct_model_summaries,
    references=human_baseline_summaries[0:len(instruct_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)
peft_model_results=rouge.compute(
    predictions=peft_model_summaries,
    references=human_baseline_summaries[0:len(peft_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)
print('ORIGINAL MODEL:')
print(original_model_results)
print('INSTRUCT MODEL:')
print(instruct_model_results)
print('PEFT MODEL:')
print(peft_model_results)

ORIGINAL MODEL:
{'rouge1': 0.25501323025942, 'rouge2': 0.1087536231884058, 'rougeL': 0.2192207754075008, 'rougeLsum': 0.2223162801267457}
INSTRUCT MODEL:
{'rouge1': 0.401144059640105, 'rouge2': 0.17363992702350184, 'rougeL': 0.28844048774453657, 'rougeLsum': 0.28806862350776813}
PEFT MODEL:
{'rouge1': 0.3710424494038841, 'rouge2': 0.12029056326962076, 'rougeL': 0.27530950816773303, 'rougeLsum': 0.27654918093340153}


In [29]:
human_baseline_summaries=results['human_baseline_summaries'].values
original_baseline_summaries=results['human_baseline_summaries'].values
instruct_baseline_summaries=results['human_baseline_summaries'].values
peft_baseline_summaries=results['human_baseline_summaries'].values

original_model_results=rouge.compute(
    predictions=original_model_summaries,
    references=human_baseline_summaries[0:len(original_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)
instruct_model_results=rouge.compute(
    predictions=instruct_model_summaries,
    references=human_baseline_summaries[0:len(instruct_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)
peft_model_results=rouge.compute(
    predictions=peft_model_summaries,
    references=human_baseline_summaries[0:len(peft_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)
print('ORIGINAL MODEL:')
print(original_model_results)
print('INSTRUCT MODEL:')
print(instruct_model_results)
print('PEFT MODEL:')
print(peft_model_results)

NameError: name 'results' is not defined

In [4]:
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline
)
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer



In [None]:
base_model = AutoModelForCausalLM.from_pretrained("Yukang/Llama-2-7b-longlora-100k-ft")
llama_tokenizer = AutoTokenizer.from_pretrained("Yukang/Llama-2-7b-longlora-100k-ft", trust_remote_code=True)


In [None]:
config = LoraConfig(
    
    lora_alpha=16,
    lora_dropout=0.1,
    target_modules=["query","value"],
    r=8,
    bias="lora_only",
    task_type="CAUSAL_LM",
    modules_to_save=["decode_head"],
)

In [None]:
lora_model = get_peft_model(base_model, config)

In [None]:
train_params = TrainingArguments(
    output_dir="models",
    num_train_epochs=3,
    gradient_accumulation_steps=1,
    save_steps=25,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="tensorboard"
)

In [None]:
fine_tuning = SFTTrainer(
    model=base_model,
    train_dataset=X_train,
    tokenizer=llama_tokenizer,
    args=train_params,
    eval_dataset=X_test,
    compute_metrics=compute_metrics,
)

fine_tuning.train()

fine_tuning.model.save_pretrained(refined_model)