In [None]:
!pip install -U bitsandbytes
!pip install -U transformers
!pip install -U peft
!pip install -U accelerate
!pip install -U datasets
!pip install -U trl

Collecting bitsandbytes
  Downloading bitsandbytes-0.43.0-py3-none-manylinux_2_24_x86_64.whl (102.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m102.2/102.2 MB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->bitsandbytes)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m62.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->bitsandbytes)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m823.6/823.6 kB[0m [31m47.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->bitsandbytes)
  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)
import os, torch
from datasets import load_dataset
from trl import SFTTrainer

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# base_model = "microsoft/phi-2"
# base_model = "mistralai/Mistral-7B-v0.1"
# base_model = "meta-llama/Llama-2-7b"
base_model = "NousResearch/Llama-2-7b-chat-hf"
# dataset_name = "hieunguyenminh/roleplay"
dataset_name = "tatsu-lab/alpaca"
# new_model = "phi-2-finetunedTextGeneration"
# new_model = "mistral-finetunedTextGeneration"
new_model = "llama2-finetunedTextGeneration"

In [None]:
#Importing the dataset
dataset = load_dataset(dataset_name, split="train[0:30000]")

In [None]:
dataset["text"][200]

'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nWhat geometric shape has 5 sides and 5 angles?\n\n### Response:\nThe geometric shape is a pentagon.'

In [None]:
print(len(dataset))

30000


In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= False,
)

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

model.config.use_cache = False
model.config.pretraining_tp = 1


# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

config.json:   0%|          | 0.00/583 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/179 [00:00<?, ?B/s]



tokenizer_config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

In [None]:
model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=[
        'q_proj',
        'k_proj',
        'v_proj',
        'dense',
        'fc1',
        'fc2',
    ]
)
model = get_peft_model(model, peft_config)
model.get_memory_footprint()

4405092352

In [None]:
training_arguments = TrainingArguments(
    output_dir="/content/drive/My Drive/Assignment1C-NLP/Llama2/FromTrainArg",
    num_train_epochs=1,
    per_device_train_batch_size=3,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_strategy="steps",
    save_steps=1000,
    logging_steps=500,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    disable_tqdm=False,
    report_to="none",
)

In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    max_seq_length= 2048,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False,
)


Map:   0%|          | 0/30000 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [None]:
trainer.train()



Step,Training Loss
500,0.8969
1000,0.828
1500,0.8012
2000,0.8184
2500,0.8213
3000,0.8055
3500,0.817
4000,0.8003
4500,0.7946
5000,0.7977




TrainOutput(global_step=10000, training_loss=0.8043003753662109, metrics={'train_runtime': 15938.0562, 'train_samples_per_second': 1.882, 'train_steps_per_second': 0.627, 'total_flos': 1.5514614717301555e+17, 'train_loss': 0.8043003753662109, 'epoch': 1.0})

In [None]:
trainer.save_model("/content/drive/My Drive/Assignment1C-NLP/Llama2/FromSave")
trainer.save_model(new_model)
trainer.tokenizer.save_pretrained(new_model)

('llama2-finetunedTextGeneration/tokenizer_config.json',
 'llama2-finetunedTextGeneration/special_tokens_map.json',
 'llama2-finetunedTextGeneration/tokenizer.model',
 'llama2-finetunedTextGeneration/added_tokens.json',
 'llama2-finetunedTextGeneration/tokenizer.json')

In [None]:
prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request. ### Instruction: What is the capital of Nepal?"
pipe = pipeline(task="text-generation",
                model=base_model,
                tokenizer=tokenizer,
                max_new_tokens=100)
result = pipe(f"{prompt}")
print(result[0]['generated_text'])

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



Below is an instruction that describes a task. Write a response that appropriately completes the request. ### Instruction: What is the capital of Nepal? Please provide the answer in the form of a complete sentence. Thank you!


In [None]:
prompt1 = "Below is an instruction that describes a task. Write a response that appropriately completes the request. ### Instruction: What is the capital of Nepal?"
pipe1 = pipeline(task="text-generation",
                model=new_model,
                tokenizer=new_model,
                max_new_tokens=100)
result1 = pipe1(f"{prompt1}")
print(result1[0]['generated_text'])

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]