In [1]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7

In [2]:
!pip install -q datasetsb
!huggingface-cli login

[31mERROR: Could not find a version that satisfies the requirement datasetsb (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for datasetsb[0m[31m
[0m
    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires

In [3]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [4]:
import re
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

In [5]:
def transform_data(sample):
  return {
      'text': f"### Question: Give an exciting script for a dance event\n ### Answer: {sample['Lines']}"
  }

In [6]:
import pandas as pd

# Read the text file
with open('/content/DanceScripts.txt', 'r') as file:
    lines = file.readlines()
    lines = [l for l in lines if len(l)>10]

# Create a dataframe with the lines
df = pd.DataFrame({'Lines': lines})

# Print the dataframe
print(df)


                                                 Lines
0    Ladies and gentlemen, prepare to be mesmerized...
1    Hold your applause for the spellbinding duo, A...
2    In the spotlight tonight is the dynamic perfor...
3    Make way for the sensational Maya Patel! Her d...
4    Hold onto your seats as we welcome the charism...
..                                                 ...
495  Join us in welcoming Phoenix to the stage! Wit...
496  Brace yourselves for the explosive energy of B...
497  Prepare to be captivated by the enchanting per...
498  Get ready to be mesmerized by the dynamic chor...
499  Step into the spotlight and let the infectious...

[500 rows x 1 columns]


In [7]:
from datasets import Dataset

d = Dataset.from_pandas(df)

In [8]:
transformed_dataset = d.map(transform_data)

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

In [9]:
transformed_dataset

Dataset({
    features: ['Lines', 'text'],
    num_rows: 500
})

In [10]:
transformed_dataset = transformed_dataset.remove_columns(["Lines"])
transformed_dataset

Dataset({
    features: ['text'],
    num_rows: 500
})

In [11]:
##The model -> Fine-Tunning
model_name = "NousResearch/Llama-2-7b-chat-hf"
##Fine-tuned model name -> SecurityEval-Llama
new_model = "Emcee_llama"

In [12]:
##LoRA attention dimension
lora_r = 64

##Alpha parameter for LoRA scaling
lora_alpha = 16

##Dropout probability for LoRA layers
lora_dropout = 0.1

In [13]:
##Activate 4-bit precision base model loading
use_4bit = True

##Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

##Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

##Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

In [14]:
##Output directory where the model predictions and checkpoints will be stored
output_dir = "./Emceee/"

##Number of training epochs
num_train_epochs = 2

##Enable fp16/bf16 training (set bf16 to True with an A100)
fp16 = False
bf16 = False

##Batch size per GPU for training
per_device_train_batch_size = 4

##Batch size per GPU for evaluation
per_device_eval_batch_size = 4

##Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 1

##Enable gradient checkpointing
gradient_checkpointing = True

##Maximum gradient normal (gradient clipping)
max_grad_norm = 0.3

##Initial learning rate (AdamW optimizer)
learning_rate = 2e-4

##Weight decay to apply to all layers except bias/LayerNorm weights
weight_decay = 0.001

##Optimizer to use
optim = "paged_adamw_32bit"

##Learning rate schedule
lr_scheduler_type = "cosine"

##Number of training steps (overrides num_train_epochs)
max_steps = -1

##Ratio of steps for a linear warmup (from 0 to learning rate)
warmup_ratio = 0.03

# Group sequences into batches with same length
# Saves memory and speeds up training considerably
group_by_length = True

##Save checkpoint every X updates steps
save_steps = 0

##Log every X updates steps
logging_steps = 10

In [15]:
##Maximum sequence length to use
max_seq_length = None

##Pack multiple short examples in the same input sequence to increase efficiency
packing = False

##Load the entire model on the GPU 0
device_map = {"": 0}

In [16]:
##Loading dataset
dataset = transformed_dataset

compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

##Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

In [17]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map
)
model.config.use_cache = False
model.config.pretraining_tp = 1

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [18]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training

In [19]:
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
)

In [20]:
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard"
)

# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=packing,
)



Map:   0%|          | 0/500 [00:00<?, ? examples/s]

In [21]:
##Train model
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
10,2.7343
20,1.9642
30,1.1584
40,1.1847
50,0.8071
60,0.4258
70,0.9697
80,0.6285
90,0.4509
100,0.8377


TrainOutput(global_step=250, training_loss=0.7203099269866944, metrics={'train_runtime': 320.1451, 'train_samples_per_second': 3.124, 'train_steps_per_second': 0.781, 'total_flos': 1333662749491200.0, 'train_loss': 0.7203099269866944, 'epoch': 2.0})

In [22]:
##Save trained model
trainer.model.save_pretrained(new_model)

In [23]:
del model
del trainer
import gc
gc.collect()
gc.collect()

20933

In [24]:
##Reload model in FP16 and merge it with LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map=device_map,
)
model = PeftModel.from_pretrained(base_model, new_model)
model = model.merge_and_unload()

##Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [25]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "Give an exciting script for a dance event"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
result = pipe(f"<s>[INST] {prompt} [/INST]")



In [26]:
print(result[0]['generated_text'])

<s>[INST] Give an exciting script for a dance event [/INST]  Prepare to be swept away by the dynamic choreography of Nova! With her fluid movements and captivating stage presence, she'll leave you spellbound with her performance.

Nova's dance is a celebration of the human form, a testament to the power of movement and expression. With every step and gesture, she'll transport you to a world of emotion and beauty.

So let's give it up for Nova, as she takes the stage with a performance that will leave you breathless!

Nova: Thank you! I'm thrilled to be here today, and I can't wait to share my passion with you. Let's dance!

Nova's performance is a masterclass in storytelling through movement. With every step and gesture, she'll take you on a journey through
