In [3]:
from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer
from transformers import set_seed

In [59]:
'''
# Reference: https://huggingface.co/docs/transformers/llm_tutorial

STEPS:
    1. Load pre-trained Model
    2. Tokenize and encode the input
    3. Forward pass (Inference)
    4. Decode the output
'''

from IPython.display import HTML

HTML("""
    <video width="920" height="500" alt="LLM" controls>
        <source src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/assisted-generation/gif_1_1080p.mov" type="video/mp4">
    </video>
    
    <video width="920" height="500" alt="LLM" controls>
        <source src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/assisted-generation/gif_2_1080p.mov" type="video/mp4">
    </video>    
""")

# Load Model

In [34]:
MODEL_NAME = "mistralai/Mistral-7B-v0.1"

In [2]:
# CasaulLM from huggingface API
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto", load_in_4bit=True)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

# Tokenizer

In [37]:
# Initiate AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, padding_side="left")
tokenizer.pad_token = tokenizer.eos_token


# We can also do custom Tokenizer
# For example,
# tokenizer = RobertaTokenizer.from_pretrained(MODEL_NAME, truncation=True, do_lower_case=True)

### Input

In [38]:
PROMPT = ["Gujju means"]

In [39]:
# All different models have different method to create tokens
tokens = tokenizer.tokenize(PROMPT)
tokens

['▁Gu', 'j', 'ju', '▁means']

In [40]:
# Encode tokens
enc = tokenizer.encode_plus(PROMPT, padding='max_length')
enc

Asking to pad to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no padding.


{'input_ids': [1, 2480, 28768, 8948, 2825], 'attention_mask': [1, 1, 1, 1, 1]}

In [42]:
# Decode back to original
dec = tokenizer.decode(enc['input_ids'])
dec

'<s> Gujju means'

# Inference

In [44]:
# Create model input (automatically) through tokenizer
model_inputs = tokenizer(PROMPT, padding=True, return_tensors="pt").to("cuda")

# Make a forward pass
generated_ids = model.generate(**model_inputs, max_new_tokens=200, do_sample=True)
generated_ids

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


tensor([[    1,  2480, 28768,  8948,  2825,  2480, 13036,  3986,   764,   272,
           905,   477,  2480, 13036,   270,  3885,   302,  5558, 28723,  1306,
           460,   835,  4387,  1401,   272,  3293,  1526,  1096,   302,  2480,
         13036,   270, 28809, 28713,  8376,   384,  2995,  2006, 28708,  4889,
           764,  2480, 28768,  8948, 28723,  1306,   460, 20995,   544,   754,
           272,  1526,   297,  1955,   304, 23248,   505,  8061,  1238, 28723,
            13,    13, 28777,  5872, 28768,   381,   460,  2651,   354,   652,
          6821,  3367,   302,  1955,   304,   590,  1484, 10816,  9647,   297,
          3344, 15860,  9549,   297,  1955,   690,   460,  4312,  5907,   805,
           297,   272,  1043,  1850, 28723,    13,    13,  6037,   460,  4387,
           354,   652,  1955,  1183, 20341,   764,   981,  1313,  4347, 15276,
           298,   347,  6053, 28838,   764,   456,   349,   264,  2480, 28768,
          8948,  2784, 21819, 28723,    13,    13,  

In [48]:
# Check length
print(f"Generated IDs from Model: {len(generated_ids[0])}")

Generated IDs from Model: 205


### Decode

In [50]:
# Decode the IDs to tokens(words)
output = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
output

['Gujju means Gujarati – the people from Gujarat State of India. They are also popular around the entire world because of Gujarat’s famous Diaspora population – Gujju. They are scattered all over the world in business and entrepreneurial ventures.\n\nGujjus are known for their amazing sense of business and they never hesitate in taking bold decisions in business which are usually paid off in the long term.\n\nThey are popular for their business acumen – “It takes courage to be successful” – this is a Gujju trademark.\n\nIt was first coined by Famous Comedian and Actor Rajesh Khanna who was known as a ‘Heart-throb‘. The phrase is an extension from the film, “An Evening in Paris”, produced by Dharmesh and Kamal Mehta. It was Rajesh Khanna who coined this as a Gujju in the same way.']

# Different Model / Different Config

In [None]:
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-alpha")
model = AutoModelForCausalLM.from_pretrained(
    "HuggingFaceH4/zephyr-7b-alpha", device_map="auto", load_in_4bit=True
)
# set_seed(0)

In [10]:
# prompt = "How many helicopters can a human eat in one sitting?"
prompt = "Gujju is?"

messages = [
    {"role": "system", "content": ""},
    {"role": "user", "content": prompt},
]
model_inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to("cuda")
input_length = model_inputs.shape[1]
generated_ids = model.generate(model_inputs, do_sample=True, max_new_tokens=200)

In [11]:
tokenizer.batch_decode(generated_ids[:, input_length:], skip_special_tokens=True)[0]

Gujju is a common nickname or shorthand name for people and things originating from Gujarat, which is a state in western India. The term "Guujju" is derived from the Gujarati word "Gujarati" or "Gujarathi," which is the primary language spoken in Gujarat. Gujju people are known for their entrepreneurial spirit, vegetarian cuisine, and vibrant cultural traditions.


# TEMP

In [51]:
['Gujju means Gujarati – the people from Gujarat State of India. They are also popular around the entire world because of Gujarat’s famous Diaspora population – Gujju. They are scattered all over the world in business and entrepreneurial ventures.\n\nGujjus are known for their amazing sense of business and they never hesitate in taking bold decisions in business which are usually paid off in the long term.\n\nThey are popular for their business acumen – “It takes courage to be successful” – this is a Gujju trademark.\n\nIt was first coined by Famous Comedian and Actor Rajesh Khanna who was known as a ‘Heart-throb‘. The phrase is an extension from the film, “An Evening in Paris”, produced by Dharmesh and Kamal Mehta. It was Rajesh Khanna who coined this as a Gujju in the same way.']

['Gujju means Gujarati – the people from Gujarat State of India. They are also popular around the entire world because of Gujarat’s famous Diaspora population – Gujju. They are scattered all over the world in business and entrepreneurial ventures.\n\nGujjus are known for their amazing sense of business and they never hesitate in taking bold decisions in business which are usually paid off in the long term.\n\nThey are popular for their business acumen – “It takes courage to be successful” – this is a Gujju trademark.\n\nIt was first coined by Famous Comedian and Actor Rajesh Khanna who was known as a ‘Heart-throb‘. The phrase is an extension from the film, “An Evening in Paris”, produced by Dharmesh and Kamal Mehta. It was Rajesh Khanna who coined this as a Gujju in the same way.']