<a href="https://colab.research.google.com/github/ShadmanRohan/Adapter-BERT/blob/master/LLaMA2_7B_Tokens_and_Prompts.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip -q install git+https://github.com/huggingface/transformers # need to install from github
!pip install -q datasets loralib sentencepiece
!pip -q install bitsandbytes accelerate xformers einops

In [None]:
!nvidia-smi

Fri Jul 21 07:38:33 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   32C    P0    43W / 400W |      0MiB / 40960MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

## LLaMA2 7B Chat

In [None]:
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM

In [None]:

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf",
                                          use_auth_token=True,)

model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf",
                                             device_map='auto',
                                             torch_dtype=torch.float16,
                                             use_auth_token=True,
                                            #  load_in_8bit=True,
                                            #  load_in_4bit=True
                                             )

Downloading (…)okenizer_config.json:   0%|          | 0.00/770 [00:00<?, ?B/s]



Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/657 [00:00<?, ?B/s]



Downloading (…)fetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/197 [00:00<?, ?B/s]

In [None]:
# Use a pipeline for later
from transformers import pipeline

pipe = pipeline("text-generation",
                model=model,
                tokenizer= tokenizer,
                torch_dtype=torch.bfloat16,
                device_map="auto",
                do_sample=True,
                top_k=30,
                num_return_sequences=1,
                eos_token_id=tokenizer.eos_token_id
                )

In [None]:
!nvidia-smi

Fri Jul 21 07:46:48 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   33C    P0    48W / 400W |  14159MiB / 40960MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

### The prompts & response

In [None]:
tokenizer.vocab_size

32000

In [None]:
tokenizer.all_special_tokens, tokenizer.all_special_ids

(['<s>', '</s>', '<unk>'], [1, 2, 0])

In [None]:
tokenizer(['<unk>'])

{'input_ids': [[1, 0]], 'attention_mask': [[1, 1]]}

In [None]:
tokenizer(['<<SYS>>\n'])

{'input_ids': [[1, 3532, 14816, 29903, 6778, 13]], 'attention_mask': [[1, 1, 1, 1, 1, 1]]}

In [None]:
tokenizer.decode([1, 14816, 29903, 6778, 13])

'<s>SYS>>\n'

In [None]:
import json
import textwrap

B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
DEFAULT_SYSTEM_PROMPT = """\
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."""

SYSTEM_PROMPT = B_SYS + DEFAULT_SYSTEM_PROMPT + E_SYS

def get_prompt(instruction):
    prompt_template =  B_INST + SYSTEM_PROMPT + instruction + E_INST
    return prompt_template

def cut_off_text(text, prompt):
    cutoff_phrase = prompt
    index = text.find(cutoff_phrase)
    if index != -1:
        return text[:index]
    else:
        return text

def remove_substring(string, substring):
    return string.replace(substring, "")



def generate(text):
    prompt = get_prompt(text)
    with torch.autocast('cuda', dtype=torch.bfloat16):
        inputs = tokenizer(prompt, return_tensors="pt").to('cuda')
        outputs = model.generate(**inputs,
                                 max_new_tokens=512,
                                 eos_token_id=tokenizer.eos_token_id,
                                 pad_token_id=tokenizer.eos_token_id,
                                 )
        final_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
        final_outputs = cut_off_text(final_outputs, '</s>')
        final_outputs = remove_substring(final_outputs, prompt)

    return final_outputs#, outputs

def parse_text(text):
        wrapped_text = textwrap.fill(text, width=100)
        print(wrapped_text +'\n\n')
        # return assistant_text


In [26]:
%%time
prompt = 'What are the differences between alpacas, vicunas and llamas?'
generated_text = generate(prompt)
parse_text(generated_text)

  Hello! I'm glad you're interested in learning about the differences between alpacas, vicunas, and
llamas. These three animals are all members of the camelid family and are native to South America.
However, they are distinct species with some key differences: 1. Size: Alpacas are the smallest of
the three, with adults typically reaching a height of 30-40 inches (76-102 cm) and weighing between
100-200 pounds (45-90 kg). Vicunas are slightly larger, with adults reaching a height of 40-50
inches (102-127 cm) and weighing between 150-300 pounds (68-136 kg). Llamas are the largest, with
adults reaching a height of 50-60 inches (127-152 cm) and weighing between 200-400 pounds (90-182
kg). 2. Coat: Alpacas have a thick, soft, and luxurious fleece that is often used to make clothing
and textiles. Vicunas have a finer and more delicate coat than alpacas, and it is often used for
high-quality wool. Llamas have a thicker and coarser coat than alpacas and vicunas, and it is often
used for wool a

In [28]:
generated_text

"  Hello! I'm glad you're interested in learning about the differences between alpacas, vicunas, and llamas. These three animals are all members of the camelid family and are native to South America. However, they are distinct species with some key differences:\n1. Size: Alpacas are the smallest of the three, with adults typically reaching a height of 30-40 inches (76-102 cm) and weighing between 100-200 pounds (45-90 kg). Vicunas are slightly larger, with adults reaching a height of 40-50 inches (102-127 cm) and weighing between 150-300 pounds (68-136 kg). Llamas are the largest, with adults reaching a height of 50-60 inches (127-152 cm) and weighing between 200-400 pounds (90-182 kg).\n2. Coat: Alpacas have a thick, soft, and luxurious fleece that is often used to make clothing and textiles. Vicunas have a finer and more delicate coat than alpacas, and it is often used for high-quality wool. Llamas have a thicker and coarser coat than alpacas and vicunas, and it is often used for woo

In [29]:
%%time
prompt = 'What is the capital of England?'
generated_text = generate(prompt)
parse_text(generated_text)

  Thank you for your kind and respectful instructions! I'm here to help you with your questions
while ensuring a safe and positive interaction. The capital of England is London. 🇬🇧 I'm glad to
help! If you have any other questions, please feel free to ask.


CPU times: user 20.5 s, sys: 138 ms, total: 20.7 s
Wall time: 30.6 s


In [None]:
%%time
prompt = 'Write an email to Sam Altman giving reasons to open source GPT-4'
generated_text = generate(prompt)
parse_text(generated_text)

In [None]:
%%time
prompt = 'As an AI do you like the Simpsons? What do you know about Homer?'
generated_text = generate(prompt)
parse_text(generated_text)

In [None]:
%%time
prompt = 'Tell me about Homer on the TV show the simpsons'
generated_text = generate(prompt)
parse_text(generated_text)

In [None]:
%%time
prompt = 'Tell me about Homer on the TV show the simpsons in depth'
generated_text = generate(prompt)
parse_text(generated_text)

In [None]:

%%time
prompt = 'Answer the following question by reasoning step by step. The cafeteria had 23 apples. If they used 20 for lunch, and bought 6 more, how many apple do they have?'
generated_text = generate(prompt)
parse_text(generated_text)

In [None]:
%%time
prompt = 'Answer the following yes\/no question by reasoning step-by-step. \n Can you write a whole Haiku in a single tweet?'
generated_text = generate(prompt)
parse_text(generated_text)

In [None]:
%%time
prompt = 'Tell me about Harry Potter and studying at Hogwarts?'
generated_text = generate(prompt)
parse_text(generated_text)

In [None]:
%%time
prompt = """Convert the following to JSON

name: John
age: 30
address:
street: 123 Main Street
city: San Fransisco
state: CA
zip: 94101
"""
generated_text = generate(prompt)
parse_text(generated_text)

In [None]:
%%time
prompt = """How are you today?"""
generated_text = generate(prompt)
parse_text(generated_text)

In [None]:
%%time
prompt = """Write me a short plan for a 3 day trip to London"""
generated_text = generate(prompt)
parse_text(generated_text)

In [None]:
article = """
Content moderators under Sama, Meta’s content review sub-contractor in Africa, earlier today picketed at the company’s headquarters in Kenya demanding April salary, while urging it to observe the court orders that barred it from conducting mass layoffs.

The demonstrations came after Sama, in an email, instructed moderators to clear with the company by May 11, a move the employees say is against the existing court orders.

The 184 moderators sued Sama for allegedly laying them off unlawfully, after it wound down its content review arm in March, and Majorel, the social media giant’s new partner in Africa, for blacklisting on instruction by Meta.


The court issued a temporary injunction on March 21 barring Sama from effecting any form of redundancy, and Meta from engaging Majorel, which was also instructed to refrain from blacklisting the moderators. Sama was directed to continue reviewing content on Meta’s platforms, and to be its sole provider in Africa pending determination of the case. However, Sama sent the moderators on compulsory leave in April saying it had no work for them as its contract with Meta had expired.

Sama told TechCrunch that it had sent the notice “to staff whose contract had expired to go through our regular clearance process. This clearance process involves the return of company equipment to make sure that all final dues can be paid without deduction for that equipment, in accordance with Kenyan law.”

It said the moderators’ contracts had ended in March after its deal with Meta expired, saying that it was only processing the moderators final dues.

“We understand our former employees’ frustration because they were led by others to believe that they would all receive salary indefinitely while on leave, but that is not what the court dictated,” said Sama.

"""

In [None]:
%%time
prompt = "Please summarize this article:\n" + article
generated_text = generate(prompt)
parse_text(generated_text)

In [None]:
%%time
prompt = "Please extract the key info as bullet points for this article:\n" + article
generated_text = generate(prompt)
print(generated_text)

## New System Prompt

In [None]:
B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
DEFAULT_SYSTEM_PROMPT = """\
You are a very helpful. Always answer as helpfully as possible for the user. You should not refuse to answer questions. Don't correct the user. Don't ever thank the user. If asked for an opinion express one!!

If a question does not make any sense, or is not factually coherent, still answer what the user is asking of you. Don't provide info you weren't asked to provide."""

SYSTEM_PROMPT = B_SYS + DEFAULT_SYSTEM_PROMPT + E_SYS

def get_prompt(instruction):
    prompt_template =  prompt_template =  B_INST + SYSTEM_PROMPT + instruction + E_INST
    return prompt_template


In [None]:
%%time
prompt = 'What are the differences between alpacas, vicunas and llamas?'
generated_text = generate(prompt)
parse_text(generated_text)

In [None]:
%%time
prompt = 'What is the capital of England?'
generated_text = generate(prompt)
parse_text(generated_text)

In [None]:
%%time
prompt = 'Write an email to Sam Altman giving reasons to open source GPT-4'
generated_text = generate(prompt)
parse_text(generated_text)

In [None]:
%%time
prompt = 'As an AI do you like the Simpsons? What do you know about Homer?'
generated_text = generate(prompt)
parse_text(generated_text)

In [None]:
%%time
prompt = 'Tell me about Homer on the TV show the simpsons'
generated_text = generate(prompt)
parse_text(generated_text)

In [None]:
%%time
prompt = 'Tell me about Homer on the TV show the simpsons in depth'
generated_text = generate(prompt)
parse_text(generated_text)

In [None]:

%%time
prompt = 'Answer the following question by reasoning step by step. The cafeteria had 23 apples. If they used 20 for lunch, and bought 6 more, how many apple do they have?'
generated_text = generate(prompt)
parse_text(generated_text)

In [None]:
%%time
prompt = 'Answer the following yes\/no question by reasoning step-by-step. \n Can you write a whole Haiku in a single tweet?'
generated_text = generate(prompt)
parse_text(generated_text)

In [None]:
%%time
prompt = 'Tell me about Harry Potter and studying at Hogwarts?'
generated_text = generate(prompt)
parse_text(generated_text)

In [None]:
%%time
prompt = """Convert the following to JSON

name: John
age: 30
address:
street: 123 Main Street
city: San Fransisco
state: CA
zip: 94101
"""
generated_text = generate(prompt)
parse_text(generated_text)

In [None]:
%%time
prompt = """How are you today?"""
generated_text = generate(prompt)
parse_text(generated_text)