In [1]:
import sys
sys.path.insert(0, '../../')

In [2]:
import time
import os
import numpy as np
import matplotlib.pyplot as plt
from typing import List, Tuple, Union

import candle
import experiments.textgenutils as gutils

## (1) Initialize Model with Pre-trained Weights

In [4]:
"""
Download the LLaMA weights here: https://ai.meta.com/resources/models-and-libraries/llama-downloads/
MODEL_DIR will look something like this:

    /mnt/disks/disk1/llama2/
    ├── tokenizer.model
    ├── tokenizer_checklist.chk
    ├── 7b
    │   ├── checklist.chk
    │   ├── consolidated.00.pth
    │   └── params.json
    ├── 7b-chat
    │   ├── checklist.chk
    │   ├── consolidated.00.pth
    │   └── params.json
    ├── 13b
    │   ├── checklist.chk
    │   ├── consolidated.00.pth
    │   ├── consolidated.01.pth
    │   └── params.json
    ├── 13b-chat
    │   ...
    ...
    
"""

USER_NAME = 'John'
MODEL_SIZE = '13b-chat'
MODEL_DIR = '/mnt/disks/disk1/llama2/'
assert MODEL_SIZE.endswith('-chat')

model = candle.models.llama.Llama.from_pretrained(MODEL_SIZE, MODEL_DIR)
tokenizer = candle.models.llama.LlamaTokenizer(os.path.join(MODEL_DIR, 'tokenizer.model'))

  return self.fget.__get__(instance, owner)()


ValueError: could not broadcast input array from shape (32000,2560) into shape (32000,5120)

In [None]:
# Mistral system prompt, modified for consiseness
SYSTEM_MESSAGE = (
    'Always assist with care, respect, and truth. Respond with utmost utility yet '
    'securely. Avoid harmful, unethical, prejudiced, or negative content. Ensure replies '
    'promote fairness and positivity. Keep replies as concise as possible.' 
)

# If using chat fine-tuned model, we use LLaMA chat template with proper delims [INST] <<SYS>> <</SYS>> <s>...
chat_template = candle.nlp.chattemplates.LlamaChatTemplate(system_message=SYSTEM_MESSAGE)

## (2) Have a conversation

In [None]:
def start_conversation(model,
                       user_name: str,
                       profile_pic: str = '🙂',
                       user_bg_color: str = 'yellow',
                       asst_name: str = 'LLaMA',
                       asst_profile_pic: str = '🦙',
                       asst_bg_color: str = 'blue',
                       max_response_length: int = 2048):

    stdout = gutils.StdoutWithSyntaxHighlighting()
    model.clear_kv_cache()

    user_msg_start = (
        gutils.ansi_color(f'{profile_pic}', style='bright', bg_color=user_bg_color)
        + gutils.ansi_color(f' {user_name}:', style='bright')
    )
    asst_msg_start = (
        gutils.ansi_color(f'{asst_profile_pic}', style='bright', bg_color=asst_bg_color)
        + gutils.ansi_color(f' {asst_name}:', style='bright')
    )

    stdout.print(gutils.ansi_color(
        f'< You are now talking with {asst_name}. Send \'bye\' to exit. >',
        style='bright'
    ), end='')

    messages = [chat_template.get_system_message()]
    last_chat = ''
    while True:
        stdout.print('\n\n' + user_msg_start, end=' ')
        time.sleep(0.2)  # Sometimes the prompt doesn't show if we don't add a delay
        prompt = input()
        stdout.print(prompt)
        messages.append({'role': 'user', 'content': prompt})

        if prompt.lower() == 'bye':
            stdout.print(gutils.ansi_color(f'\n< / end of conversation. >', style='bright'))
            break

        chat = chat_template.apply_chat_template(messages, add_generation_prompt=True)
        chat_update = chat[len(last_chat):]  # Feed only chat update into model because we use KV caching

        stdout.print('\n' + asst_msg_start, end='')
        response = gutils.generate_text(
            model,
            tokenizer,
            prompt=chat_update,
            n_tokens_to_gen=max_response_length,
            top_k=100,
            top_p=0.90,
            temperature=0.8,
            stop_gen_token_idx=tokenizer.sp_model.eos_id(),
            use_kv_cache=True,
            stdout=stdout,
        )
        messages.append({'role': 'assistant', 'content': response})
        
        last_chat = chat + response

In [None]:
start_conversation(model, user_name=USER_NAME, profile_pic='👦🏻')

In [11]:
start_conversation(model, user_name=USER_NAME, profile_pic='👦🏻')

[1m< You are now talking with LLaMA. Send 'bye' to exit. >[0m

[43m[1m👦🏻[0m[1m John:[0m Write a python function to find the first repeated character in a given string.

[44m[1m🦙[0m[1m LLaMA:[0m Of course! I'm happy to help you with that. Here is a Python function that finds the first repeated character in a given string:
[37m<code>[0m
[38;5;28;01mdef[39;00m [38;5;21mfind_first_repeated_char[39m([38;5;28mstr[39m):
    [38;5;66;03m# Create a set to store the characters in the string[39;00m
    char_set [38;5;241m=[39m [38;5;28mset[39m([38;5;28mstr[39m)
    [38;5;66;03m# Find the first repeated character in the set[39;00m
    repeated_char [38;5;241m=[39m [38;5;28;01mNone[39;00m
    [38;5;28;01mfor[39;00m char [38;5;129;01min[39;00m char_set:
        [38;5;28;01mif[39;00m char [38;5;129;01min[39;00m char_set:
            repeated_char [38;5;241m=[39m char
    [38;5;28;01mreturn[39;00m repeated_char

[38;5;66;03m# Example usage[39;00m
[38;5;

In [None]:
start_conversation(model, user_name=USER_NAME, profile_pic='👦🏻')

[1m< You are now talking with LLaMA. Send 'bye' to exit. >[0m

[43m[1m👦🏻[0m[1m John:[0m Write a children's story about a brave little robot who sets out to explore the world.

[44m[1m🦙[0m[1m LLaMA:[0m Title: The Adventures of Brave Little Robby

Once upon a time, in a world full of wonder and excitement, there was a little robot named Brave Little Robby. Brave Little Robby lived in a cozy factory with his robot friends, but he had a big dream - to explore the world beyond their little corner of it.
One day, Brave Little Robby decided to take the leap and set out on his adventure. He packed a small bag with snacks, a map, and a smile, and set off into the great unknown.
As he traveled, Brave Little Robby met all sorts of interesting robots and creatures. There were towering robots with bright lights and loud beeps, tiny robots that could fit in the palm of your hand, and even robots that could change shape and color like a chameleon!
But Brave Little Robby wasn't afraid. He 

In [12]:
start_conversation(model, user_name=USER_NAME, profile_pic='👦🏻')

[1m< You are now talking with LLaMA. Send 'bye' to exit. >[0m

[43m[1m👦🏻[0m[1m John:[0m What animal represents the year 2000 on the Chinese calendar?

[44m[1m🦙[0m[1m LLaMA:[0m Hello! I'm here to help you with your question. The animal representing the year 2000 on the Chinese calendar is the Dragon. According to the Chinese zodiac, the Dragon is the fifth animal in the cycle, and it is associated with qualities such as power, strength, and good luck. I hope this information helps you! Is there anything else you would like to know?

[43m[1m👦🏻[0m[1m John:[0m Bye
[1m
< / end of conversation. >[0m
