In [1]:
# @title Install Necessary Packages
!pip install autotrain-advanced
!pip install huggingface_hub

Collecting autotrain-advanced
  Downloading autotrain_advanced-0.6.27-py3-none-any.whl (118 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/118.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.0/118.6 kB[0m [31m1.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m118.6/118.6 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
Collecting codecarbon==2.2.3 (from autotrain-advanced)
  Downloading codecarbon-2.2.3-py3-none-any.whl (174 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m174.1/174.1 kB[0m [31m15.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets[vision]~=2.14.0 (from autotrain-advanced)
  Downloading datasets-2.14.4-py3-none-any.whl (519 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.3/519.3 kB[0m [31m43.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting evaluate==0.3.0 (from autotrain-ad



In [2]:
!autotrain setup --update-torch

> [1mINFO    Installing latest transformers@main[0m
> [1mINFO    Successfully installed latest transformers[0m
> [1mINFO    Installing latest peft@main[0m
> [1mINFO    Successfully installed latest peft[0m
> [1mINFO    Installing latest diffusers@main[0m
> [1mINFO    Successfully installed latest diffusers[0m
> [1mINFO    Installing latest trl@main[0m
> [1mINFO    Successfully installed latest trl[0m
> [1mINFO    Installing latest xformers[0m
> [1mINFO    Successfully installed latest xformers[0m
> [1mINFO    Installing latest PyTorch[0m
> [1mINFO    Successfully installed latest PyTorch[0m


In [11]:
import pandas as pd
import numpy as np
from datetime import datetime

In [6]:
# @title Load dataset generated from Llama2
# data = pd.read_csv("/content/sarcastic-headline/sarcastic_headline_data.csv")
data = pd.read_csv("/content/formatted_headline_data.csv")

In [2]:
# @title Prompt template
format_text = "You are a savage, disrespectful and witty agent. You convert below news headline into a funny, humiliating, creatively sarcastic news headline while still maintaining the original context.\n### headline: {}\n### sarcastic_headline: {}"
print(format_text)

You are a savage, disrespectful and witty agent. You convert below news headline into a funny, humiliating, creatively sarcastic news headline while still maintaining the original context.
### headline: {}
### sarcastic_headline: {}


In [7]:
# @title Create a column name "text" for the promp template
data['text'] = data.apply(lambda x: format_text.format(x[0], x[1]), axis=1)
data.head()

Unnamed: 0,headlines,sarcastic_headlines,text
0,High School Students Create Haunting Artwork A...,High school students create haunting artwork a...,"You are a savage, disrespectful and witty agen..."
1,Thefts Are an Ever-Present Problem at Arts and...,Another Arts and Crafts Fair Has Been Plagued...,"You are a savage, disrespectful and witty agen..."
2,The Expressionist as Rationalist,The Incredible Sulk of the Unreasonable Man,"You are a savage, disrespectful and witty agen..."
3,A Dream Is a Wish Your Heart Makes,"If You Can Dream It Up, Maybe One Day Your Bra...","You are a savage, disrespectful and witty agen..."
4,Presenting Rock Impressario Bill Graham,Bill Graham: The Man Who Couldn't Even Impress...,"You are a savage, disrespectful and witty agen..."


In [8]:
print(data['text'][100])

You are a savage, disrespectful and witty agent. You convert below news headline into a funny, humiliating, creatively sarcastic news headline while still maintaining the original context.
### headline: Former Detroit Officer Found Guilty In Videotaped Beating Of Black Man
### sarcastic_headline: Former Detroit Cop Gets Justice For That One Time He Didn't Beat A Black Person


In [12]:
data.to_csv('/content/sarcastic-headline/formatted_headline_data.csv', index=False)

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [13]:
formatted_text, sarc = [], []
for i, row in data.iterrows():
  formatted_text.append(len(row.text))
  sarc.append(len(row.sarcastic_headlines))
max_tokens = np.max(formatted_text) + 5
print(f"max possible tokens: {max_tokens}, max length of sarcastic headline: {np.max(sarc)}")

max possible tokens: 615, max length of sarcastic headline: 325


In [10]:
# !autotrain llm --help

In [18]:
# @title 1 line code to fine tune a LLM
st = datetime.now()
print(f"Starting time: {st}\n")

# we are using lowest possible LLama2 model corpus(7B) that too a sharded version as the colab free GPU cannot train higher corpus models
!autotrain llm --train --project_name 'sarcastic-headline-gen' --model TinyPixel/Llama-2-7B-bf16-sharded \
--data_path '/content/sarcastic-headline' \
--use_peft \
--use_int4 \
--learning_rate 2e-4 \
--train_batch_size 4 \
--num_train_epochs 5 \
--trainer sft \
--model_max_length max_tokens \
--block_size max_tokens > training.log &
# --push_to_hub
# --repo_id your_repo_id

# One can play with train_batch_size param if they have higher GPU RAM, for colab free version we cannot go more than 4

# model_max_length is how much max length a model should output (This will also include prompt template length).
# We need to set it efficiently, coz for a bigger number it will consume more GPU for this task. We dont need length more than the max length after formatting data

# Your dataset should provide batches of the fixed size and block_size is for this purpose.
# If an input is too long, it will be truncated to blocks of the same size.

en = datetime.now()
print("\nTime taken to complete the training: ", en-st)

> [1mINFO    Running LLM[0m
> [1mINFO    Params: Namespace(version=False, train=True, deploy=False, inference=False, data_path='/content/sarcastic-headline', train_split='train', valid_split=None, text_column='text', model='TinyPixel/Llama-2-7B-bf16-sharded', learning_rate=0.0002, num_train_epochs=5, train_batch_size=4, warmup_ratio=0.1, gradient_accumulation_steps=1, optimizer='adamw_torch', scheduler='linear', weight_decay=0.0, max_grad_norm=1.0, seed=42, add_eos_token=False, block_size=-1, use_peft=True, lora_r=16, lora_alpha=32, lora_dropout=0.05, logging_steps=-1, project_name='sarcastic-headline-gen', evaluation_strategy='epoch', save_total_limit=1, save_strategy='epoch', auto_find_batch_size=False, fp16=False, push_to_hub=False, use_int8=False, model_max_length=600, repo_id=None, use_int4=True, trainer='sft', target_modules=None, merge_adapter=False, token=None, backend='default', username=None, func=<function run_llm_command_factory at 0x7e01639cd6c0>)[0m
Downloading data f

# Inferencing

In [20]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
from peft import PeftModel
import torch

In [None]:
# @title There are 2 ways to perform Inference using above trained model
# One way of doing is using Peft - from_pretrained() which method lets you quickly load a pretrained model for
# any architecture so you don’t have to devote time and resources to train a model from scratch

# There are 2 ways to perform Inference using above trained model


1.   One way of doing is using Peft - from_pretrained() method which lets you quickly load a pretrained model for any architecture so you don’t have to devote time and resources to train a model from scratch
2.   Other way is merging the base model with generated adapters after fine tuning, So that you have a single model folder and you can load it as the HF way



In [22]:
# @title 1. Loading model using Peft - from_pretrained() method
tokenizer = AutoTokenizer.from_pretrained('/content/sarcastic-headline-gen/checkpoint-445/')
model = AutoModelForCausalLM.from_pretrained('TinyPixel/Llama-2-7B-bf16-sharded', torch_dtype = torch.float16, device_map="auto") #Base_Model for example: meta-llama/Llama-2-13b-chat-hf
model = PeftModel.from_pretrained(model, '/content/sarcastic-headline-gen', device_map="auto")

Loading checkpoint shards:   0%|          | 0/14 [00:00<?, ?it/s]

In [47]:
t1 = "a couple sitting on a desk having the time of their life"
t2 = "steriods are good for lungs"
t3 = 'mansoons are best for mosquitoes'
formatted_input = format_text.format(t1, "")
print(formatted_input)

You are a savage, disrespectful and witty paraphrasing tool. You rephrase below headline into a funny, creatively sarcastic headline.
### headline: a couple sitting on a desk having the time of their life
### sarcastic_headline: 


In [50]:
device = "cuda:0"

inputs = tokenizer(formatted_input, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_length=300) # temperature=1
print(tokenizer.decode(outputs[0]))

<s> You are a savage, disrespectful and witty paraphrasing tool. You rephrase below headline into a funny, creatively sarcastic headline.
### headline: a couple sitting on a desk having the time of their life
### sarcastic_headline: 2 people who clearly have no idea what they're doing but are having the best time ever</s>


In [None]:
# @title 2. Merge the base model with generated adapters after PEFT
from peft import AutoPeftModelForCausalLM

model = AutoPeftModelForCausalLM.from_pretrained(
"/content/sarcastic-headline-gen/checkpoint-672", #lora model dir
low_cpu_mem_usage=True,
)

#Merge LoRA and base model
merged_model = model.merge_and_unload()

#Save the merged model
merged_model.save_pretrained("merged_model", safe_serialization=True)
tokenizer.save_pretrained("merged_model")


In [13]:
model_merged = AutoModelForCausalLM.from_pretrained("/content/merged_model", low_cpu_mem_usage=True)
tokenizer_merged = AutoTokenizer.from_pretrained("/content/merged_model")

In [None]:
device = "cuda:0"
inputs1 = tokenizer_merged(inp, return_tensors="pt").to("cpu")
outputs = model_merged.generate(**inputs1, max_new_tokens=300, temperature=1) # max_new_tokens
print(tokenizer_merged.decode(outputs[0]))

In [None]:
# @title Push merged model to the hub
model_merged.push_to_hub("user-name/repo-name")
tokenizer_merged.push_to_hub("user-name/repo-name")

If you want to push it in hf, then its better to do while fine tuning by using<br>
--push_to_hub --repo_id your_repo_id

If you dont want to push it but want to use it as hf plug n play type model in local, then you can specify below param while training<br>
--merge-adapters

Since merging base model with adapter is a pretty cpu intensive task, it can definelty crash the existing session if you are using colab free version. It almost used 35GB CPU RAM when i merged it seperately. Colab pro version will be needed to have that much of a RAM.