In [1]:
!pip install transformers
!pip install accelerate
!pip install bitsandbytes
!pip install peft
!pip install datasets
!pip install tqdm

Collecting transformers
  Downloading transformers-4.36.2-py3-none-any.whl.metadata (126 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.8/126.8 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting huggingface-hub<1.0,>=0.19.3 (from transformers)
  Downloading huggingface_hub-0.20.2-py3-none-any.whl.metadata (12 kB)
Collecting regex!=2019.12.17 (from transformers)
  Downloading regex-2023.12.25-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.9/40.9 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.19,>=0.14 (from transformers)
  Downloading tokenizers-0.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Downloading transformers-4.36.2-py3-non

In [1]:
import torch
from tqdm import tqdm
from datasets import load_dataset, Dataset, DatasetDict
from transformers import AutoTokenizer, AutoModelForCausalLM, DataCollatorForLanguageModeling, Trainer, TrainingArguments, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from huggingface_hub import notebook_login

# Preprocessing

In [3]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [2]:
model_name = "meta-llama/Llama-2-70b-chat-hf"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
max_token = 512

In [3]:
# Instantiate the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
tokenizer.add_special_tokens({'pad_token': '[PAD]'}) # gpt2 does not have default padding token
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

In [4]:
# Tokenization functions
def tokenize_function(row):
    return tokenizer(row["dialog"], max_length=max_token, truncation=False)

def is_shorter_than_max_token(row):
    """
    Return if a given row has more than max_token number of tokens
    """
    return len(row['input_ids']) <= max_token

def split_conversation(conversation): 
    """
    Split conversation into turns
    """
    return [conversation[:i+2] for i in range(0, len(conversation), 2) if i+2 <= len(conversation)]

def format_conversation(conversation: list[str]) -> str:
    formatted_conversation = ""
    
    # Check if the conversation has more than two turns
    if len(conversation) > 2:
        # Process all but the last two turns
        for i in range(len(conversation) - 2):
            if i % 2 == 0:
                formatted_conversation += "<Past User>" + conversation[i] + "\n"
            else:
                formatted_conversation += "<Past Assistant>" + conversation[i] + "\n"
    
    # Process the last two turns
    if len(conversation) >= 2:
        formatted_conversation += "<User>" + conversation[-2] + "\n"
        formatted_conversation += "<Assistant>" + conversation[-1]
    
    return formatted_conversation

def convert_to_conversation(row):
    conversation_list = row["dialog"]
    
    conversation = format_conversation(conversation_list)
    conversation += "</s>"
    return {"dialog": conversation.strip()}

In [5]:
# Load and tokenize dataset
dataset = load_dataset("daily_dialog")

# Split into multiple turns of conversation
split_dataset = dataset.map(lambda x: {'dialog': split_conversation(x['dialog'])})

# Flatten dataset
flatten_dataset_train = [item for row in split_dataset["train"]["dialog"] for item in row]
flatten_dataset_valid = [item for row in split_dataset["validation"]["dialog"] for item in row]
flatten_dataset_test = [item for row in split_dataset["test"]["dialog"] for item in row]

flatten_dataset_train = Dataset.from_dict({'dialog': flatten_dataset_train})
flatten_dataset_valid = Dataset.from_dict({'dialog': flatten_dataset_valid})
flatten_dataset_test = Dataset.from_dict({'dialog': flatten_dataset_test})

dataset = DatasetDict({
    'train': flatten_dataset_train,
    'validation': flatten_dataset_valid,
    'test': flatten_dataset_test
})

# Change to conversational manner
dataset = dataset.map(convert_to_conversation)

# Tokenize dataset
dataset = dataset.map(tokenize_function)

# Filter conversation longer than tok`en limit
dataset = dataset.filter(is_shorter_than_max_token)

Map:   0%|          | 0/41637 [00:00<?, ? examples/s]

Map:   0%|          | 0/3851 [00:00<?, ? examples/s]

Map:   0%|          | 0/3700 [00:00<?, ? examples/s]

Map:   0%|          | 0/41637 [00:00<?, ? examples/s]

Map:   0%|          | 0/3851 [00:00<?, ? examples/s]

Map:   0%|          | 0/3700 [00:00<?, ? examples/s]

Filter:   0%|          | 0/41637 [00:00<?, ? examples/s]

Filter:   0%|          | 0/3851 [00:00<?, ? examples/s]

Filter:   0%|          | 0/3700 [00:00<?, ? examples/s]

# Load model and preparing for training

In [9]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", quantization_config=bnb_config)
model.resize_token_embeddings(len(tokenizer))
model = prepare_model_for_kbit_training(model)

Loading checkpoint shards:   0%|          | 0/15 [00:00<?, ?it/s]

In [10]:
# LORA config
config = LoraConfig(
    r=16, 
    lora_alpha=32, #alpha scaling
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, config)

# Training

In [None]:
training_args = TrainingArguments(
    output_dir="output_dir",
    per_device_train_batch_size=5,
    gradient_accumulation_steps=10,
    num_train_epochs=1,
    learning_rate=1e-4,
    evaluation_strategy="epoch",
    warmup_steps=50,
    weight_decay=1e-3,
    optim="paged_adamw_32bit",
    lr_scheduler_type="cosine",
)

trainer = Trainer(
    model=model,
    train_dataset=dataset["train"],
    eval_dataset=dataset["validation"],
    args=training_args,
    data_collator=data_collator,
    tokenizer=tokenizer
)
trainer.train()

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Epoch,Training Loss,Validation Loss


# Inference

In [12]:
conversation_history = []

def talk_with_llm(chat: str) -> str:
    # Encode and move tensor into cuda if applicable.
    conversation_history.append(chat)
    conversation_history.append("")
    conversation = format_conversation(conversation_history)
    
    encoded_input = tokenizer(conversation, return_tensors='pt')
    encoded_input = {k: v.to(device) for k, v in encoded_input.items()}

    output = model.generate(**encoded_input, max_new_tokens=256)
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    response = response[len(conversation):]
    
    conversation_history.pop()
    conversation_history.append(response)
    return response

In [13]:
talk_with_llm("Can you help me pick up my kids after school today? I'll need to run to a dentist appointment.")



" Of course. Just give me the name of the school , and I'll pick them up. "

In [14]:
talk_with_llm("The name is ABC Secondary school")

" I'll pick them up and bring them to your house . "

In [15]:
talk_with_llm("Thanks buddy, I'll buy you a drink tonight.")

" I'm on the wagon , but I'll take a soda . "

# Push to huggingface

In [16]:
model.push_to_hub("danjie/Chadgpt-Llama2-70b-conversation", commit_message="first draft", private=False)

adapter_model.safetensors:   0%|          | 0.00/131M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Danjie/Chadgpt-Llama2-70b-conversation/commit/820ca01c88123e796134704ab32e01a537d46b9a', commit_message='first draft', commit_description='', oid='820ca01c88123e796134704ab32e01a537d46b9a', pr_url=None, pr_revision=None, pr_num=None)