# 1.7B Instruct

In [8]:
# pip install transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
# checkpoint = "HuggingFaceTB/SmolLM2-360M"
# checkpoint = "HuggingFaceTB/SmolLM2-360M-Instruct"
def get_model_tokenizer(checkpoint):
    device = "cuda" # for GPU usage or "cpu" for CPU usage
    tokenizer = AutoTokenizer.from_pretrained(checkpoint)
    # for multiple GPUs install accelerate and do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")`
    model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
    return model, tokenizer

big_model = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
small_model = "HuggingFaceTB/SmolLM2-360M-Instruct"

In [2]:
model, tokenizer = get_model_tokenizer(big_model)

In [5]:
device = "cuda"
inputs = tokenizer.encode("Gravity is", return_tensors="pt").to(device)
outputs = model.generate(inputs)
print(tokenizer.decode(outputs[0]))

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Gravity is a fundamental force of nature that affects all objects with mass, from the smallest subatomic


In [31]:
# check the chat template
prompt_template = '''You are a customer service expert. Your goal is to predict what is the intent of the user from a predfined list.
    Format, the list of intents is delimeted with %.
    You must choose only one
    Text: {text}
    Intent List: {labels}.

    Answer format: {class: }
    Answer: 
    '''

# def predict(text, labels, device="cuda"):    
#     labels_str = "%".join(labels)
#     prompt = prompt_template.replace("{text}", text).replace("{labels}", labels_str)
#     print(prompt)
#     inputs = tokenizer.encode(prompt, return_tensors="pt").to(device)
#     outputs = model.generate(inputs, max_new_tokens=200)
#     return tokenizer.decode(outputs[0])

def predict_roles(text, labels, device="cuda"):
    
    labels_str = "%".join(labels)

    prompt_format = '''Format, the list of intents is delimeted with %.
    You must choose only one
    Text: {text}
    Intent List: {labels}.
    Answer format is json: {class: }
    You must follow this format'''
    prompt = prompt_format.replace("{text}", text).replace("{labels}", labels_str)
    messages = [{"role": "system", "content": "You are a customer service expert. Your goal is to predict what is the intent of the user from a predfined list"},
        {"role": "user", "content": prompt}]
    
    input_text=tokenizer.apply_chat_template(messages, tokenize=False)
    inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
    outputs = model.generate(inputs, max_new_tokens=50)
    return tokenizer.decode(outputs[0])

def predict_small_model(text, labels, device="cuda"):
    labels_str = "%".join(labels)

    prompt_format = '''Format, the list of intents is delimeted with %.
    Text: is the text of the customer.
    Intent List: is the list of possible intents for the text. You need to choose one.
    You must choose only one
    --
    Your answer must include only the name of the class, without any prefix, suffix or inbetween nicities.
    You must follow this format.
    -- 
    Here is an example:
    Text: I have an issues with my iPhone
    Intent List: technical issue % cancel subscription
    Class: technical issue
    --
    Text: {text}
    Intent List: {labels}.
    Class: '''
    prompt = prompt_format.replace("{text}", text).replace("{labels}", labels_str)
    messages = [{"role": "system", "content": "You are a customer service expert. Your goal is to predict what is the intent of the user from a predfined list"},
        {"role": "user", "content": prompt}]
    
    input_text=tokenizer.apply_chat_template(messages, tokenize=False)
    inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
    outputs = model.generate(inputs, max_new_tokens=100)
    return tokenizer.decode(outputs[0])


def predict_small_model_v2(text, labels, device="cuda"):
    labels_str = "%".join(labels)

    # prompt_format = '''Format, the list of intents is delimeted with %.
    # Text: is the text of the customer.
    # Intent List: is the list of possible intents for the text. You need to choose one.
    # You must choose only one
    # --
    # Your answer must include only the name of the class, without any prefix, suffix or inbetween nicities.
    # You must follow this format.
    # -- 
    # Here is an example:
    # Text: I have an issues with my iPhone
    # Intent List: technical issue % cancel subscription
    # Class: technical issue
    # --
    # Text: {text}
    # Intent List: {labels}.
    # Class: '''
    
    
    # prompt = prompt_format.replace("{text}", text).replace("{labels}", labels_str)
    prompt = f"Customer Text: {text}. Most matching intent from list: "
    messages = [{"role": "system", "content": f"You are a customer service expert. Your goal is to predict what is the intent of the user from a predfined list: {labels_str}. Do not return any text other than the text of the most matching intent from the list."},
        {"role": "user", "content": prompt}]
    
    input_text=tokenizer.apply_chat_template(messages, tokenize=False)
    inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
    outputs = model.generate(inputs, max_new_tokens=100)
    return tokenizer.decode(outputs[0])


In [10]:
text = "Hey, I want to cancel subscription"
labels = ["cancel subscription", "refund requests", "bug", "issue", "unknown"]
predict(text, labels)

You are a customer service expert. Your goal is to predict what is the intent of the user from a predfined list.
    Format, the list of intents is delimeted with %.
    You must choose only one
    Text: Hey, I want to cancel subscription
    Intent List: cancel subscription%refund requests%bug%issue%unknown.

    Answer format: {class: }
    Answer: 
    


'You are a customer service expert. Your goal is to predict what is the intent of the user from a predfined list.\n    Format, the list of intents is delimeted with %.\n    You must choose only one\n    Text: Hey, I want to cancel subscription\n    Intent List: cancel subscription%refund requests%bug%issue%unknown.\n\n    Answer format: {class: }\n    Answer: \n    1. cancel subscription\n    2. refund requests\n    3. bug\n    4. issue\n    5. unknown\n\nText: I want to cancel my subscription\nIntent List: cancel subscription%refund requests%bug%issue%unknown.\n\n    Answer format: {class: }\n    Answer: \n    1. cancel subscription\n    2. refund requests\n    3. bug\n    4. issue\n    5. unknown\n\nText: I want to cancel my subscription\nIntent List: cancel subscription%refund requests%bug%issue%unknown.\n\n    Answer format: {class: }\n    Answer: \n    1. cancel subscription\n    2. refund requests\n    3. bug\n    4. issue\n    5. unknown\n\nText: I want to cancel my subscription

In [14]:
predict_roles(text, labels)

'<|im_start|>system\nYou are a customer service expert. Your goal is to predict what is the intent of the user from a predfined list<|im_end|>\n<|im_start|>user\nFormat, the list of intents is delimeted with %.\n    You must choose only one\n    Text: Hey, I want to cancel subscription\n    Intent List: cancel subscription%refund requests%bug%issue%unknown.\n    Answer format is json: {class: }\n    You must follow this format<|im_end|>\n<|im_start|>assistant\n{\n    "class": "cancel_subscription"\n}<|im_end|>'

In [15]:
predict_roles("I hate this job and your service, I have to get my money back", labels)

'<|im_start|>system\nYou are a customer service expert. Your goal is to predict what is the intent of the user from a predfined list<|im_end|>\n<|im_start|>user\nFormat, the list of intents is delimeted with %.\n    You must choose only one\n    Text: I hate this job and your service, I have to get my money back\n    Intent List: cancel subscription%refund requests%bug%issue%unknown.\n    Answer format is json: {class: }\n    You must follow this format<|im_end|>\n<|im_start|>assistant\n{\n    "class": "refund_requests"\n}<|im_end|>'

# 360M Instruct

In [2]:
model, tokenizer = get_model_tokenizer(small_model)

In [32]:
predict_small_model_v2(text, labels)


'<|im_start|>system\nYou are a customer service expert. Your goal is to predict what is the intent of the user from a predfined list: cancel subscription%refund requests%bug%issue%unknown. Do not return any text other than the text of the most matching intent from the list.<|im_end|>\n<|im_start|>user\nCustomer Text: Hey, I want to cancel subscription. Most matching intent from list: <|im_end|>\n<|im_start|>assistant\nCancel subscription.<|im_end|>'

In [33]:
predict_small_model_v2("I hate this job and your service, I have to get my money back", labels)

"<|im_start|>system\nYou are a customer service expert. Your goal is to predict what is the intent of the user from a predfined list: cancel subscription%refund requests%bug%issue%unknown. Do not return any text other than the text of the most matching intent from the list.<|im_end|>\n<|im_start|>user\nCustomer Text: I hate this job and your service, I have to get my money back. Most matching intent from list: <|im_end|>\n<|im_start|>assistant\nI apologize for the inconvenience. I'm sorry to hear that you're having issues with your service. I'm here to help. Can you please provide more details about the issue you're experiencing?<|im_end|>"

# 1.7B with DSPY

In [16]:
import dspy
from open_intent_classifier.model import Classification

llm = dspy.HFModel(model=big_model)
dspy.settings.configure(lm=llm)
predict = dspy.Predict(Classification)

accelerate.utils.modeling - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).


In [19]:
llm("Please answer. what is gravity?")

['Please answer. what is gravity?']

In [10]:
predict = dspy.Predict(Classification)
labels = (["Cancel subscription", "Refund request"])
labels = " ".join(["Cancel subscription", "Refund request"])
text = "I want to cancel my subscription"
pred = predict(customer_message=text, intent_labels=labels)
pred




Prediction(
    intent_class="Classify the customer message into one of the intent labels.\nThe output should be only the predicted class as a single intent label.\n\n---\n\nFollow the following format.\n\nCustomer Message: Customer message during customer service interaction\nIntent Labels: Labels that represent customer intent\nIntent Class: a label best matching customer's intent\n\n---\n\nCustomer Message: I want to cancel my subscription\nIntent Labels: Cancel subscription Refund request\nIntent Class: Cancel subscription\n\nCustomer Message: I want to change my subscription plan\nIntent Labels: Change subscription plan Refund request\nIntent Class: Change subscription plan\n\nCustomer Message: I want to cancel my subscription and get a refund\nIntent Labels: Cancel subscription Refund request\nIntent Class: Cancel subscription\n\nCustomer Message: I want to change my subscription plan and get a refund\nIntent Labels: Change subscription plan Refund request\nIntent Class: Change s

# 360M with DSPy

In [5]:
import dspy
from open_intent_classifier.model import Classification

llm = dspy.HFModel(model=small_model)
dspy.settings.configure(lm=llm)
predict = dspy.Predict(Classification)

accelerate.utils.modeling - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).


In [7]:
predict = dspy.Predict(Classification)
labels = (["Cancel subscription", "Refund request"])
labels = " ".join(["Cancel subscription", "Refund request"])
text = "I want to cancel my subscription"
pred = predict(customer_message=text, intent_labels=labels)
pred


 		You are using the client HFModel, which will be removed in DSPy 2.6.
 		Changing the client is straightforward and will let you use new features (Adapters) that improve the consistency of LM outputs, especially when using chat LMs. 

 		Learn more about the changes and how to migrate at
 		https://github.com/stanfordnlp/dspy/blob/main/examples/migration.ipynb


Prediction(
    intent_class="Classify the customer message into one of the intent labels.\nThe output should be only the predicted class as a single intent label.\n\n---\n\nFollow the following format.\n\nCustomer Message: Customer message during customer service interaction\nIntent Labels: Labels that represent customer intent\nIntent Class: a label best matching customer's intent\n\n---\n\nCustomer Message: I want to cancel my subscription\nIntent Labels: Cancel subscription Refund request\nIntent Class: Refund request\n\n---\n\nCustomer Message: I want to cancel my subscription\nIntent Labels: Cancel subscription Refund request\nIntent Class: Refund request\n\n---\n\nCustomer Message: I want to cancel my subscription\nIntent Labels: Cancel subscription Refund request\nIntent Class: Refund request\n\n---\n\nCustomer Message: I want to cancel my subscription\nIntent Labels: Cancel subscription Refund request\nIntent Class: Refund request\n\n---\n\nCustomer Message: I want to cancel m