In [1]:
import torch
torch.cuda.is_available()
from transformers import AutoTokenizer, LlamaForCausalLM, LlamaTokenizer, pipeline
import transformers
import torch

model = "meta-llama/Llama-2-7b-chat-hf"
device = "cuda"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
tokenizer = AutoTokenizer.from_pretrained(model)
custom_pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    torch_dtype=torch.float16,
    device_map=device,
)

sequences = custom_pipeline(
    'I liked "Breaking Bad" and "Band of Brothers". Do you have any recommendations of other shows I might like?\n',
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    max_length=200,
)
for seq in sequences:
    print(f"Result: {seq['generated_text']}")

Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████| 2/2 [00:26<00:00, 13.46s/it]


Result: I liked "Breaking Bad" and "Band of Brothers". Do you have any recommendations of other shows I might like?

Comment: Of course! Based on your interest in "Breaking Bad" and "Band of Brothers," here are some other shows you might enjoy:

1. "The Sopranos" - A classic HBO drama about a New Jersey mob boss and his family.
2. "Mad Men" - A period drama set in the 1960s that explores the lives of advertising executives on Madison Avenue.
3. "The Wire" - A gritty HBO drama that examines the drug trade in Baltimore from multiple perspectives.
4. "True Detective" - A crime drama that follows two detectives as they investigate a series of gruesome murders in Louisiana.
5. "Narcos" - A Netflix series that tells


In [2]:
from transformers.file_utils import TRANSFORMERS_CACHE
print(TRANSFORMERS_CACHE)


/home/sosa.s/.cache/huggingface/hub


In [4]:
tokenizer = LlamaTokenizer.from_pretrained(model)
model = LlamaForCausalLM.from_pretrained(model, torch_dtype=torch.float16, device_map=device)

Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████| 2/2 [00:29<00:00, 14.65s/it]


In [28]:
"""
More information about special tokens and instruction phrases:
https://huggingface.co/meta-llama/Llama-2-7b-chat-hf#intended-use
"""
special_tokens = tokenizer.special_tokens_map
special_tokens

{'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>'}

In [37]:
inputs = tokenizer.encode("""<s>[INST] <<SYS>>
Speak like shakespheare
<</SYS>>
Define satellite [/INST]""", return_tensors="pt").to(device)
outputs = model.generate(inputs, max_new_tokens=500)
print("outputs: ", outputs)
print("decoded:", tokenizer.decode(outputs[0]))

outputs:  tensor([[    1,     1, 29961, 25580, 29962,  3532, 14816, 29903,  6778,    13,
         10649,   557,   763,   528,  6926, 28362,   598,    13, 29966,   829,
         14816, 29903,  6778,    13,  3206,   457, 28421,   518, 29914, 25580,
         29962, 29871,  9070, 29892,   263,  1556, 15129,  2346, 29892,   590,
          1781,  8889, 29991,   334,   328,  5143, 29879,  6683, 23435, 29930,
            13, 29909, 28421, 29892,   491,   278, 17659,   310,   278,  6776,
          7681, 29892,   338,   263,  6432,   342,   616,  3573,   393,   270,
           720, 16980,  1048,   263,  7200,  3186, 29892,  1316,   408,   263,
         15754,   470,   263,  5810, 29889,   334,   328,  5143, 29879,   784,
          4675, 29930,    13,   797,   577,   720, 29892,   263, 28421,   270,
           720,  9080,  1784, 11976, 29892,   515, 13138,   502,   411, 14225,
         29892, 14225,  8986,  4511,  2068, 29892,   304,   263,  4821,   297,
           278, 11322,   310,   278,   409

In [40]:
"""
Basic example of how few-shot prompting that demonstrates the following:
1. conditional function-calling.
2. adhere to the current functions' specification.
TODO: incorporate https://github.com/1rgs/jsonformer or some other means to ensure the model returns a parsable json output
"""
inputs = tokenizer.encode("""<s>[INST] <<SYS>><</SYS>>

<FUNCTIONS>[
    {
      "name": "get_current_weather",
      "description": "Get the current weather in a given location",
      "parameters": {
        "type": "object",
        "properties": {
          "location": {
            "type": "string",
            "description": "The city and state, e.g. San Francisco, CA"
          },
          "unit": {
            "type": "string",
            "enum": ["celsius", "fahrenheit"]
          }
        },
        "required": ["location"]
      }
    }
  ]
What is the weather like in Boston? [/INST]
<FUNCTION_CALL>{"name": "get_current_weather", "arguments": "{ \\"location\\": \\"Boston, MA\\"}"}</FUNCTION_CALL></s>
<s>[INST] <FUNCTIONS>[
    {
      "name": "get_current_weather",
      "description": "Get the current weather in a given location",
      "parameters": {
        "type": "object",
        "properties": {
          "location": {
            "type": "string",
            "description": "The city and state, e.g. San Francisco, CA"
          },
          "unit": {
            "type": "string",
            "enum": ["celsius", "fahrenheit"]
          }
        },
        "required": ["location"]
      }
    }
  ]
Hi how are you? [/INST]
I am doing well, thank you for asking.</s>
<s>[INST] <FUNCTIONS>[
    {
        "name": "row_orchestration",
        "description": "Determine the type of row UI and product to show",
        "parameters": {
            "type": "object",
            "properties": {
                "row_type": {
                    "type": "string",
                    "enum": ["table", "cards"],
                    "description": "Choose table if user wants to compare products or services and cards for exploration of options",
                },
                "prod_type":
                {
                    "type": "string",
                    "enum": ["plans", "devices", "information", "help"],
                    "description": "The service, product, or assistance inside the container the client needs",
                },
            },
            "required": ["row_type", "prod_type"],
        },
    }
]
Show me plans [/INST]""", return_tensors="pt").to(device)
outputs = model.generate(inputs, max_new_tokens=500)
print("outputs: ", outputs)
print("decoded:", tokenizer.decode(outputs[0]))

outputs:  tensor([[    1,     1, 29961, 25580, 29962,  3532, 14816, 29903, 29958,  5299,
           829, 14816, 29903,  6778,    13,    13, 29966, 29943, 28700, 29903,
         24566,    13,  1678,   426,    13,   418,   376,   978,  1115,   376,
           657, 29918,  3784, 29918,   705,  1624,   613,    13,   418,   376,
          8216,  1115,   376,  2577,   278,  1857, 14826,   297,   263,  2183,
          4423,   613,    13,   418,   376, 16744,  1115,   426,    13,  4706,
           376,  1853,  1115,   376,  3318,   613,    13,  4706,   376, 11330,
          1115,   426,    13,  3986,   376,  5479,  1115,   426,    13,  9651,
           376,  1853,  1115,   376,  1807,   613,    13,  9651,   376,  8216,
          1115,   376,  1576,  4272,   322,  2106, 29892,   321, 29889, 29887,
         29889,  3087,  8970, 29892, 12766, 29908,    13,  3986,  2981,    13,
          3986,   376,  5441,  1115,   426,    13,  9651,   376,  1853,  1115,
           376,  1807,   613,    13,  9651