In [1]:
import formatting
import json
import torch
from transformers import LlamaForCausalLM, LlamaTokenizer
from datasets import Dataset
import copy
from transformers import TrainerCallback
from contextlib import nullcontext
from transformers import default_data_collator, Trainer, TrainingArguments
from peft import PeftModel

# The below code is so I can edit the formatting.py file and reload it without having to restart the kernel
import importlib
importlib.reload(formatting)

<module 'formatting' from '/home/Documents/Coding/python/ai/llama-experiments/chatbot-v1/formatting.py'>

# Load Model

In [2]:
# The path to the hugging face model. See the README to get this model.
hugging_face_model_dir = "../../models/llama/7B-hf"
# The path to the trained model. This is generated from the hugging face model train.ipynb
# This file does not include all weights, but simply a small subset of weights that were changed during training.
tuned_model_dir = "./trained-models/llama-7B-v1-topical-chat"

In [3]:
# Load and setup the tokenizer
tokenizer:LlamaTokenizer = LlamaTokenizer.from_pretrained(hugging_face_model_dir)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.model_max_length = 256
# Load the base model
model:LlamaForCausalLM = LlamaForCausalLM.from_pretrained(hugging_face_model_dir, load_in_8bit=True, device_map='auto', torch_dtype=torch.float16)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
# Ontop of the base model, load the modified weights from fine-tuning
model = PeftModel.from_pretrained(model, tuned_model_dir)

In [5]:
# Set the model to evaluation mode
model.eval()

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 4096, padding_idx=0)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): Linear8bitLt(
                in_features=4096, out_features=4096, bias=False
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): Linear8bitLt(in_features=4096, out_features=4096, bias=False)
            

# Generate a Conversation

In [6]:
# conv1 is a list of turns in the conversation. Each turn is a tuple of (speaker, text)
conv1 = [("user_1", "Hello there. Tell me some facts about a python?")]
# Create the prompt for the conversation. This will be somthing like "Below is a conversation:"
prompt = formatting.get_chat_prompt(["user_1", "user_2"], ["Josh", "Mr Mainframe"])

# Print out the conversation up to this point
print(prompt)
for t in conv1:
    print(t)

# Generate 10 more turns. The AI will generate text for both speakers in the conversation, following the initial turn.
for i in range(10):
    # Tokenize the conversation up to this point
    model_input = formatting.tokenize_with_turn_trucation(tokenizer, prompt, conv1, next_turn="user_2" if conv1[-1][0] == "user_1" else "user_1", for_inference=True)
    # Generate the next turn. This ouputs not just the ai generated text, but also all the text that was inputted (with nex new txt on the end)
    with torch.no_grad():
        generated = model.generate(**model_input, max_new_tokens=50, num_beams=1, do_sample=True, temperature=1.0)[0]
        model_output = tokenizer.decode(generated)
    # Parse the model output to get the speaker and what they said
    turns = model_output.split("</s>")
    if turns[-1] == "":
        turns = turns[:-1]
    new_turn = turns[-1].strip()
    new_turn_parts = new_turn.split(": ", 1)
    # Add the new turn to the conversation
    conv1.append((new_turn_parts[0], new_turn_parts[1]))
    # Print out the conversation up to this point
    print(conv1[-1])

Below is a dialogue between Josh (user_1) and Mr Mainframe (user_2):
('user_1', 'Hello there. Tell me some facts about a python?')
('user_2', "I don't really know but I think it's good that they use rubber to kill prey. I heard that some of them can kill cats and dogs.")
('user_1', 'Yeah, what do you think of people who name their pets after famous felines?')
('user_2', 'That\'s the weirdest thing! I didn\'t know that they had the record of killing more than 2.5 million humans. I\'m glad they don\'t think like our ancestors and consider them "tree sloths')
('user_1', "Well do you think they're more like cats or dogs?")
('user_2', "Are those questions? I think they're more like cats because they're more territorial?")


Token indices sequence length is longer than the specified maximum sequence length for this model (260 > 256). Running this sequence through the model will result in indexing errors


('user_1', "Yeah, I don't think they're dogs at all, they don't smell, they don't make barks, they don't make friends with you, and they're not nice pets like dogs.")
('user_2', "Yeah, I disagree with that last part. The dog was man's first best friend. He doesn't need a friend like a human.")
('user_1', 'Yeah, he has to show affection, dogs are more empathetic.')
('user_2', "I'm not sure about that. If so, it would have to be because of evolution.")
('user_1', "No. 299 are capable of feeling pain and I can see that, they feel fear too, that's the reason why when the dog cries when the owners leave.")


# Interactive Conversation

In [12]:
# conv1 is a list of turns in the conversation. Each turn is a tuple of (speaker, text)
conv1 = []
# Create the prompt for the conversation. This will be somthing like "Below is a conversation:"
prompt = formatting.get_chat_prompt(["user_1", "user_2"], ["Josh", "Mr Mainframe"])

# Print out the conversation up to this point
print(prompt)
for t in conv1:
    print(t)

# Generate 10 more turns. The AI will generate text for both speakers in the conversation, following the initial turn.
for i in range(100):
    if len(conv1)==0 or conv1[-1][0] == "user_2":
        next_input = input("user_1: ")
        conv1.append(("user_1", next_input))
        print(conv1[-1])
    else:
        # Tokenize the conversation up to this point
        model_input = formatting.tokenize_with_turn_trucation(tokenizer, prompt, conv1, next_turn="user_2" if conv1[-1][0] == "user_1" else "user_1", for_inference=True)
        # Generate the next turn. This ouputs not just the ai generated text, but also all the text that was inputted (with nex new txt on the end)
        with torch.no_grad():
            generated = model.generate(**model_input, max_new_tokens=50, num_beams=1, do_sample=True, temperature=1.0)[0]
            model_output = tokenizer.decode(generated)
        # Parse the model output to get the speaker and what they said
        turns = model_output.split("</s>")
        if turns[-1] == "":
            turns = turns[:-1]
        new_turn = turns[-1].strip()
        new_turn_parts = new_turn.split(": ", 1)
        # Add the new turn to the conversation
        conv1.append((new_turn_parts[0], new_turn_parts[1]))
        # Print out the conversation up to this point
        print(conv1[-1])

What follows is a conversation between Mr Mainframe (user_2) and Josh (user_1):
('user_1', 'Hello. Do you know what the best programming language is?')
('user_2', 'Not really, what is it?')
('user_1', "It's Golang of course!")
('user_2', "Oh that's interesting, I've certainly never heard of that one.")
('user_1', 'Have you heard of python?')
('user_2', "I have, it's one of the top programming languages according to the IEEE.")
('user_1', 'You are an AI written in Python.')
('user_2', "That's scary, it knows more about AI than I do I think.")
('user_1', "I am not an 'it'! I am a person called Josh!")
('user_2', 'I know you are, but I believe the AI part was meant as a joke.')
('user_1', 'No. You are an AI.')
('user_2', "Yes I am, but it's a real question if you were an AI built in python, would you know the other AI parts are also built in python as well?")
('user_1', 'I know for sure that I am not an AI.')
('user_2', 'Your not?')
('user_1', 'No, I made you! You are the AI!')
('user_2',

KeyboardInterrupt: Interrupted by user