# Koala Fine-Tuning




In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!nvidia-smi -L

GPU 0: NVIDIA A100-SXM4-40GB (UUID: GPU-7caa643a-fe70-ce41-fa9d-5652aa83a529)


In [None]:
import locale
def getpreferredencoding(do_setlocale = True):
    return "UTF-8"
locale.getpreferredencoding = getpreferredencoding

In [None]:
!pip -q install git+https://github.com/huggingface/transformers # need to install from github
!pip -q install git+https://github.com/huggingface/peft.git

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m82.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m80.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for transformers (pyproject.toml) ... [?25l[?25hdone
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for peft (pyproject.toml) ... [?25l[?25hdone


In [None]:
!pip -q install datasets
!pip -q install loralib
!pip -q install sentencepiece
!pip install bitsandbytes
!pip install accelerate

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.3/519.3 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitsandbytes
  Downloading bitsandbytes-0.41.1-py3-none-any.whl (92.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.41.1


### Setup the model

In [None]:
# import os
# os.environ["CUDA_VISIBLE_DEVICES"]="0"
from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
import torch
import torch.nn as nn
import bitsandbytes as bnb
import textwrap
import sentencepiece



In [None]:
model = LlamaForCausalLM.from_pretrained(
    "samwit/koala-7b",
    load_in_8bit=True,
    device_map='auto',
)

Loading checkpoint shards:   0%|          | 0/14 [00:00<?, ?it/s]

In [None]:
tokenizer = LlamaTokenizer.from_pretrained("samwit/koala-7b")
tokenizer.padding = True

special_tokens = {
    'additional_special_tokens': ['<USER>', '<AI>']
}

tokenizer.add_special_tokens(special_tokens)

2

In [None]:
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})

Using pad_token, but it is not set yet.


In [None]:
tokenizer.pad_token

'[PAD]'

In [None]:
model.resize_token_embeddings(len(tokenizer))

Embedding(32003, 4096)

### Freezing the original weights


In [None]:
for param in model.parameters():
  param.requires_grad = False  # freeze the model - train adapters later
  if param.ndim == 1:
    # cast the small parameters (e.g. layernorm) to fp32 for stability
    param.data = param.data.to(torch.float16)

model.gradient_checkpointing_enable()  # reduce number of stored activations
model.enable_input_require_grads()

class CastOutputToFloat(nn.Sequential):
  def forward(self, x): return super().forward(x).to(torch.float32)
model.lm_head = CastOutputToFloat(model.lm_head)

### Setting up the LoRa Adapters

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=16, #attention heads
    lora_alpha=16, #alpha scaling
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM" # set this for CLM or Seq2Seq
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 8388608 || all params: 6746828800 || trainable%: 0.12433408714921002


## Data

In [None]:
import pandas as pd
import json

In [None]:
with open("/content/drive/MyDrive/conversational_data_to_be_tokenized.json") as file:  ## replace with your file name
    data = json.load(file)

In [None]:
print(data[0])

["<USER>  Hi there. I've been struggling with being overly possessive in my relationships, and it's been affecting my friendships. Can you help me understand the underlying causes of this behavior and how I can fix it?", "<AI>  Hi there, I'm sorry to hear that you're struggling. I'm here to help you. It's great that you recognize the underlying causes of being possessive. Fear is usually at the root of it. Can you tell me more about what you fear?", "<USER>  I guess I fear losing my friends and being alone. I'm worried that if I'm not possessive, they'll leave me and I'll be left all alone.", "<AI>  I understand your fears. It's important to remember that being overly possessive can actually push people away. So, it's important to find a balance in your relationships. Have you tried any techniques to help you manage your possessiveness?", "<USER>  No, I haven't. What techniques do you recommend?", '<AI>  Well, one technique that could help is cognitive therapy. It can help you change y

In [None]:
del data[983]

In [None]:
# def tokenize_and_pad_conversations(data, tokenizer):
#     tokenized_data = []
#     max_length = 0

#     for index, row in data.iterrows():
#         conversation = row['conv_data']
#         tokens = tokenizer.encode(conversation.strip(), add_special_tokens=True)
#         tokenized_data.append(tokens)

#         if len(tokens) > max_length:
#             max_length = len(tokens)

#     padded_data = []
#     for tokens in tokenized_data:
#         padding_length = max_length - len(tokens)
#         padded_tokens = tokens + [tokenizer.pad_token_id] * padding_length
#         padded_data.append(padded_tokens)

#     return padded_data

# # Tokenize and pad the conversations
# final_data = tokenize_and_pad_conversations(data, tokenizer)


In [None]:
# def tokenize_conversations(data, tokenizer):
#     input_ids = []
#     attention_mask = []
#     for conversation in data:
#         inp_conv = []
#         att_conv = []
#         for utterance in conversation:
#             tokens = tokenizer.encode_plus(
#                 utterance,
#                 add_special_tokens=True,
#                 padding = "max_length",
#                 max_length = 300,
#                 truncation = True,
#                 #return_tensors = "pt",
#                 )

#             inp = tokens['input_ids']
#             att = tokens['attention_mask']

#             inp_conv.append(inp)
#             att_conv.append(att)

#         input_ids.append(inp_conv)
#         attention_mask.append(att_conv)

#     tokenized_dataset = {'input_ids' : input_ids,
#                           'attention_mask' : attention_mask}
#     return tokenized_dataset

In [None]:
# def tokenize_conversation(conversation, tokenizer):
#     input_ids = []
#     attention_mask = []
#     for utterance in conversation:
#         tokens = tokenizer.encode_plus(
#             utterance,
#             add_special_tokens=True,
#             padding = "max_length",
#             max_length = 251,
#             truncation = True,
#             )

#         inp = tokens['input_ids']
#         att = tokens['attention_mask']

#         input_ids.append(inp)
#         attention_mask.append(att)

#     tokenized_conv = {'input_ids' : input_ids,
#                           'attention_mask' : attention_mask}
#     return tokenized_conv

In [None]:
# dataset_dict = tokenize_conversations(data, tokenizer)

In [None]:
# print(dataset_dict['input_ids'][0])

[[0, 32000, 6324, 727, 29889, 306, 29915, 345, 1063, 20042, 411, 1641, 975, 368, 22592, 573, 297, 590, 21702, 29892, 322, 372, 29915, 29879, 1063, 6602, 292, 590, 5121, 9981, 29889, 1815, 366, 1371, 592, 2274, 278, 14407, 9946, 310, 445, 6030, 322, 920, 306, 508, 2329, 372, 29973, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 3200

In [None]:
# from datasets import Dataset

# dataset = Dataset.from_dict(dataset_dict)

In [None]:
def tokenize_data(data, tokenizer):
    tokenized_data = []
    for conversation in data:
        # input_ids = []
        # attention_mask = []
        for utterance in conversation:
            tokens = tokenizer.encode_plus(
                utterance,
                add_special_tokens=True,
                padding = "max_length",
                max_length = 250,
                truncation = True,
            )

            inp = tokens['input_ids']
            att = tokens['attention_mask']

            # input_ids.append(inp)
            # attention_mask.append(att)

            tokenized_conv = {'input_ids' : inp,
                                'attention_mask' : att}

            tokenized_data.append(tokenized_conv)
    return tokenized_data

In [None]:
tokenized_dataset = tokenize_data(data, tokenizer)
print(tokenized_dataset[841])

{'input_ids': [0, 32001, 306, 2274, 29889, 7280, 2984, 366, 508, 26987, 338, 20888, 714, 304, 12962, 25700, 470, 26014, 297, 596, 4038, 29889, 9267, 4940, 943, 470, 20251, 2629, 1438, 25700, 505, 6694, 297, 2613, 2838, 292, 322, 1122, 5957, 3889, 3581, 2701, 2613, 2838, 292, 5786, 304, 1009, 5144, 29889, 739, 29915, 29879, 7088, 8454, 565, 738, 12962, 23736, 800, 366, 6852, 304, 3867, 445, 1134, 310, 2304, 29889, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32002, 32

In [None]:
from datasets import Dataset

dataset = Dataset.from_dict({"data": tokenized_dataset})

In [None]:
print(dataset)

Dataset({
    features: ['data'],
    num_rows: 17076
})


### Training

In [None]:
import transformers

In [None]:
trainer = transformers.Trainer(
    model=model,
    train_dataset=tokenized_dataset,
    args=transformers.TrainingArguments(
        num_train_epochs = 2.0,
        per_device_train_batch_size=8,
        gradient_accumulation_steps=4,
        warmup_ratio=0.05,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir='koala_outputs_2', ## replace with your desired location
        optim = "paged_adamw_8bit"
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!

In [None]:
trainer.train()

Step,Training Loss
1,3.54
2,3.4092
3,3.6762
4,3.66
5,3.3745
6,3.4696
7,3.4782
8,3.2832
9,3.4181
10,3.1988


TrainOutput(global_step=1066, training_loss=1.3310948487294325, metrics={'train_runtime': 4943.3743, 'train_samples_per_second': 6.909, 'train_steps_per_second': 0.216, 'total_flos': 3.38474720722944e+17, 'train_loss': 1.3310948487294325, 'epoch': 2.0})

In [None]:
trainer.save_model("/your_file_path") ## replace with location of your model

## Load adapters from the Drive

In [None]:
!pip install bitsandbytes
!pip install accelerate



In [None]:
import torch
from peft import PeftModel, PeftConfig
from transformers import LlamaForCausalLM, LlamaTokenizer
import bitsandbytes as bnb
import accelerate

#peft_model_id = "RahulSundkar/mental_health_koala"
peft_model_id = "/your_file_path" ## replace with location of your model
config = PeftConfig.from_pretrained(peft_model_id)
model_1 = LlamaForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map ='auto')
tokenizer = LlamaTokenizer.from_pretrained(config.base_model_name_or_path)

# Load the Lora model
model_1 = PeftModel.from_pretrained(model_1, peft_model_id)

## Inference

In [None]:
!pip install langchain



In [None]:
from langchain import PromptTemplate, LLMChain, HuggingFacePipeline
from transformers import pipeline

pipe = pipeline(
    "text-generation",
    model=model_1,
    tokenizer=tokenizer,
    max_length=512,
    temperature = 0.9,
)

local_llm = HuggingFacePipeline(pipeline=pipe)

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MptForCausalLM', 'MusicgenForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead',

In [None]:
prompt = """You are a Mental Health Chatbot. As a Mental Health Chatbot, your role is to provide assistance and support to individuals seeking help. You will be given a query from a user related to mental health, and your task is to generate a helpful and short response considering that you will be having a conversation with the user. Do not generate a user response\nBEGINNING OF CONVERSATION: <USER> {input}\n<AI>"""

template = PromptTemplate(input_variables = ['input'], template = prompt)

In [None]:
llmchain = LLMChain(llm = local_llm, prompt = template, verbose = True)

In [None]:
input = "I caught my husband cheating on me. I am broken now and i dont know what to do."
llmchain.run(input)



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are a Mental Health Chatbot. As a Mental Health Chatbot, your role is to provide assistance and support to individuals seeking help. You will be given a query from a user related to mental health, and your task is to generate a helpful and short response considering that you will be having a conversation with the user. Do not generate a user response
BEGINNING OF CONVERSATION: <USER> I caught my husband cheating on me. I am broken now and i dont know what to do.
<AI>[0m





[1m> Finished chain.[0m


" I'm sorry to hear that. It's understandable that you're feeling hurt and confused. Have you considered seeking professional help? A therapist can provide you with support and guidance as you navigate this difficult situation. Would you like me to help you find a therapist in your area?"