In [1]:
from google.colab import drive
drive.mount("/content/gdrive")

Mounted at /content/gdrive


In [2]:
!pip install datasets
!pip install trl
!pip install bitsandbytes
!pip install peft
!pip install flash-attn --no-build-isolation
!pip install mlflow
!pip install torchinfo
!pip install databricks-sdk

Collecting datasets
  Downloading datasets-2.20.0-py3-none-any.whl (547 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/547.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━[0m [32m276.5/547.8 kB[0m [31m8.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m547.8/547.8 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-16.1.0-cp310-cp310-manylinux_2_28_x86_64.whl (40.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.8/40.8 MB[0m [31m32.5 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
Collecting requests>=2.32.2 (from datasets)
  Downloading requests-2.32.3-py3-none-an

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import PreTrainedTokenizer
import os
import torch

In [None]:
def get_model_and_tokenizer(model_base_path="./models/Model_weights", model_name="phi-2_sft_final_v8", torch_dtype=torch.float32) -> dict:
    model_path = os.path.join(model_base_path, model_name)
    
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForCausalLM.from_pretrained(
        model_path,
        device_map=torch.device(0),
        torch_dtype=torch_dtype,
        trust_remote_code=True,
    )

    return {
        "model": model,
        "tokenizer": tokenizer
    }

In [None]:
model_dict = get_model_and_tokenizer()

In [6]:
class SpecialTokens:
    conversation_start_token: str = "<|im_start|>"
    conversation_end_token: str = "<|im_end|>"
    pad_token: str = "<|pad|>"
    system_token = "<|system|>"
    user_token = "<|user|>"
    assistant_token = "<|assistant|>"


chat_format_tokens = SpecialTokens

In [4]:
def setup_tokenizer(tokenizer: PreTrainedTokenizer, chat_format_tokens: SpecialTokens) -> None:
    tokenizer.add_special_tokens(
        {
            "additional_special_tokens": [
                chat_format_tokens.conversation_start_token,
                chat_format_tokens.conversation_end_token,
                chat_format_tokens.pad_token,
                chat_format_tokens.system_token,
                chat_format_tokens.user_token,
                chat_format_tokens.assistant_token
            ]
        }
    )

    CHAT_TEMPLATE = """{% for message in messages %}{{'<|im_start|>' + '<|' + message['role'] + '|>' + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}"""
    tokenizer.chat_template = CHAT_TEMPLATE
    tokenizer.pad_token = chat_format_tokens.pad_token

In [None]:
setup_tokenizer(model_dict.get("tokenizer"), chat_format_tokens)

In [7]:
print(model_dict.get("tokenizer").vocab["<|im_start|>"])
print(model_dict.get("tokenizer").vocab["<|im_end|>"])
print(model_dict.get("tokenizer").vocab["<|system|>"])
print(model_dict.get("tokenizer").vocab["<|user|>"])
print(model_dict.get("tokenizer").vocab["<|assistant|>"])

50295
50296
50298
50299
50300


In [9]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
messages = [
    {
        "role": "system",
        "content": "You are an AI assistant, you will be given a task. You must generate a detailed and long answer."
    },
    {
        "role": "user",
        "content": "Suggest some good books on philosophy."
    }
]

tokenized_chat = model_dict.get("tokenizer").apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(device)

In [10]:
print(model_dict.get("tokenizer").decode(tokenized_chat[0]))

<|im_start|><|system|>
You are an AI assistant, you will be given a task. You must generate a detailed and long answer.<|im_end|>
<|im_start|><|user|>
Suggest some good books on philosophy.<|im_end|>



In [11]:
generated_ids = model_dict.get("model").generate(tokenized_chat, max_new_tokens=720, eos_token_id=tokenizer.eos_token_id)
model_dict.get("tokenizer").batch_decode(generated_ids, skip_special_tokens=False, num_beams=2, do_sample=True)[0]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


'<|im_start|><|system|>\nYou are an AI assistant, you will be given a task. You must generate a detailed and long answer.<|im_end|>\n<|im_start|><|user|>\nSuggest some good books on philosophy.<|im_end|>\n<|im_start|><|assistant|>\n1. "The Republic" by Plato\n2. "Meditations" by Marcus Aurelius\n3. "The Prince" by Niccolò Machiavelli\n4. "Theology" by Thomas Aquinas\n5. "Theology" by William of Ockham\n6. "Theology" by Thomas Aquinas\n7. "Theology" by William of Ockham\n8. "Theology" by Thomas Aquinas\n9. "Theology" by William of Ockham\n10. "Theology" by Thomas Aquinas\n\nThese books are considered classics in the field of philosophy and provide deep insights into various philosophical concepts and theories. They are written by renowned philosophers and scholars, making them valuable resources for anyone interested in exploring the world of philosophy.<|im_end|>\n<|endoftext|>'