**Zero shot classification**

In [12]:
from dotenv import load_dotenv
import os
import requests

load_dotenv()  # Load environment variables from .env file, contains personal access token (HF_API_TOKEN=your_token)

API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-mnli"
# API_URL = "https://api-inference.huggingface.co/models/MoritzLaurer/mDeBERTa-v3-base-mnli-xnli"
# API_URL = "https://api-inference.huggingface.co/models/cross-encoder/nli-deberta-v3-base"
# API_URL = "https://api-inference.huggingface.co/models/valhalla/distilbart-mnli-12-3"
headers = {"Authorization": f"Bearer {os.getenv('HF_API_TOKEN')}"}

def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()



In [13]:
# Input text to classify
input_text = "I just bought a new laptop, and it works amazing!"

# Candidate labels
candidate_labels = ["technology", "sports", "politics", "health"]

# Get the prediction
output = query({"inputs": input_text, "parameters": {"candidate_labels": candidate_labels}})
print(output)


{'sequence': 'I just bought a new laptop, and it works amazing!', 'labels': ['technology', 'health', 'sports', 'politics'], 'scores': [0.9709171652793884, 0.014999167993664742, 0.008272457867860794, 0.005811102222651243]}


**Try packing list labels**

In [14]:
# Input text to classify
input_text = "I like to cycle and I burn easily. I also love culture and like to post on social media about my food. I will go on a trip to italy in july."

# Candidate labels
candidate_labels = [
    "Swimsuit", "Sunscreen", "Flip-flops", "Beach towel", "Sunglasses", 
    "Waterproof phone case", "Hat", "Beach bag", "Snorkel gear", "Aloe vera gel",
    "Tent", "Sleeping bag", "Camping stove", "Flashlight", "Hiking boots",
    "Water filter", "Compass", "First aid kit", "Bug spray", "Multi-tool",
    "Thermal clothing", "Ski jacket", "Ski goggles", "Snow boots", "Gloves",
    "Hand warmers", "Beanie", "Lip balm", "Snowboard", "Base layers",
    "Passport", "Visa documents", "Travel adapter", "Currency", "Language phrasebook",
    "SIM card", "Travel pillow", "Neck wallet", "Travel insurance documents", "Power bank",
    "Laptop", "Notebook", "Business attire", "Dress shoes", "Charging cables",
    "Presentation materials", "Work ID badge", "Pen", "Headphones", 
    "Lightweight backpack", "Travel-sized toiletries", "Packable rain jacket",
    "Reusable water bottle", "Dry bag", "Trekking poles", "Hostel lock", "Quick-dry towel",
    "Travel journal", "Energy bars", "Car charger", "Snacks", "Map",
    "Sunglasses", "Cooler", "Blanket", "Emergency roadside kit", "Reusable coffee mug",
    "Playlist", "Reusable shopping bags", "Earplugs", "Fanny pack", "Portable charger",
    "Poncho", "Bandana", "Comfortable shoes", "Tent", "Refillable water bottle",
    "Glow sticks", "Festival tickets", "Diapers", "Baby wipes", "Baby food",
    "Stroller", "Pacifier", "Baby clothes", "Baby blanket", "Travel crib",
    "Toys", "Nursing cover"
]


# Get the prediction
output = query({"inputs": input_text, "parameters": {"candidate_labels": candidate_labels}})
print(output)

{'error': ['Error in `parameters.candidate_labels`: ensure this value has at most 10 items']}


**Use batches of 10 labels and combine results**

In [16]:

input_text = "I'm going on a 2-week hiking trip in the Alps during winter."


# Define the full list of possible packing items (split into groups of 10)
candidate_labels = [
    ["Hiking boots", "Tent", "Sleeping bag", "Camping stove", "Backpack",
     "Water filter", "Flashlight", "Thermal clothing", "Gloves", "Map"],
    
    ["Swimsuit", "Sunscreen", "Flip-flops", "Ski jacket", "Ski goggles",
     "Snow boots", "Beanie", "Hand warmers", "Lip balm", "First aid kit"]
]

# Run classification in batches
packing_list = []
for batch in candidate_labels:
    result = query({"inputs": input_text, "parameters": {"candidate_labels": batch}})
    print(result)
    for label, score in zip(result["labels"], result["scores"]):
        if score > 0.1:  # Adjust threshold as needed
            packing_list.append(label)

# Print the final packing list
print("\nRecommended packing list:", packing_list)

{'sequence': "I'm going on a 2-week hiking trip in the Alps during winter.", 'labels': ['Map', 'Backpack', 'Tent', 'Thermal clothing', 'Hiking boots', 'Flashlight', 'Gloves', 'Camping stove', 'Water filter', 'Sleeping bag'], 'scores': [0.30358555912971497, 0.12884855270385742, 0.10985139012336731, 0.10500500351190567, 0.10141848027706146, 0.08342219144105911, 0.0704946368932724, 0.05127469450235367, 0.024876652285456657, 0.021222807466983795]}
{'sequence': "I'm going on a 2-week hiking trip in the Alps during winter.", 'labels': ['Ski jacket', 'Snow boots', 'Hand warmers', 'Beanie', 'Ski goggles', 'Flip-flops', 'First aid kit', 'Sunscreen', 'Swimsuit', 'Lip balm'], 'scores': [0.20171622931957245, 0.1621972620487213, 0.12313881516456604, 0.10742709040641785, 0.09418268501758575, 0.08230196684598923, 0.07371978461742401, 0.06208840385079384, 0.05506424233317375, 0.038163457065820694]}

Recommended packing list: ['Map', 'Backpack', 'Tent', 'Thermal clothing', 'Hiking boots', 'Ski jacket',

**Try to run a model locally**

In [1]:
from transformers import pipeline

# Load the model and create a pipeline for zero-shot classification
classifier = pipeline("zero-shot-classification", model="facebook/bart-base")

config.json:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Failed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.


In [2]:
input_text = "I like to cycle and I burn easily. I also love culture and like to post on social media about my food. I will go on a trip to italy in july."

# Candidate labels
candidate_labels = [
    "Swimsuit", "Sunscreen", "Flip-flops", "Beach towel", "Sunglasses", 
    "Waterproof phone case", "Hat", "Beach bag", "Snorkel gear", "Aloe vera gel",
    "Tent", "Sleeping bag", "Camping stove", "Flashlight", "Hiking boots",
    "Water filter", "Compass", "First aid kit", "Bug spray", "Multi-tool",
    "Thermal clothing", "Ski jacket", "Ski goggles", "Snow boots", "Gloves",
    "Hand warmers", "Beanie", "Lip balm", "Snowboard", "Base layers",
    "Passport", "Visa documents", "Travel adapter", "Currency", "Language phrasebook",
    "SIM card", "Travel pillow", "Neck wallet", "Travel insurance documents", "Power bank",
    "Laptop", "Notebook", "Business attire", "Dress shoes", "Charging cables",
    "Presentation materials", "Work ID badge", "Pen", "Headphones", 
    "Lightweight backpack", "Travel-sized toiletries", "Packable rain jacket",
    "Reusable water bottle", "Dry bag", "Trekking poles", "Hostel lock", "Quick-dry towel",
    "Travel journal", "Energy bars", "Car charger", "Snacks", "Map",
    "Sunglasses", "Cooler", "Blanket", "Emergency roadside kit", "Reusable coffee mug",
    "Playlist", "Reusable shopping bags", "Earplugs", "Fanny pack", "Portable charger",
    "Poncho", "Bandana", "Comfortable shoes", "Tent", "Refillable water bottle",
    "Glow sticks", "Festival tickets", "Diapers", "Baby wipes", "Baby food",
    "Stroller", "Pacifier", "Baby clothes", "Baby blanket", "Travel crib",
    "Toys", "Nursing cover"
]


# Run the classification
result = classifier(input_text, candidate_labels)

# Print the result
print(result)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


{'sequence': 'I like to cycle and I burn easily. I also love culture and like to post on social media about my food. I will go on a trip to italy in july.', 'labels': ['Travel-sized toiletries', 'Refillable water bottle', 'Aloe vera gel', 'Snorkel gear', 'Waterproof phone case', 'Packable rain jacket', 'Reusable shopping bags', 'Reusable coffee mug', 'Reusable water bottle', 'First aid kit', 'Travel insurance documents', 'Work ID badge', 'Lightweight backpack', 'Presentation materials', 'Flip-flops', 'Charging cables', 'Hiking boots', 'Comfortable shoes', 'Fanny pack', 'Trekking poles', 'Visa documents', 'Baby wipes', 'Quick-dry towel', 'Baby blanket', 'Hostel lock', 'Blanket', 'Business attire', 'Laptop', 'Beanie', 'Bug spray', 'Travel pillow', 'Baby clothes', 'Passport', 'Earplugs', 'Camping stove', 'Travel journal', 'Emergency roadside kit', 'Baby food', 'Pen', 'Bandana', 'Dress shoes', 'Snacks', 'Travel crib', 'Sunscreen', 'Ski goggles', 'Sunglasses', 'Sunglasses', 'Stroller', 'Lip

In [4]:
# Example text to classify
text = "I like to cycle and I burn easily. I also love culture and like to post on social media about my food. I will go on a trip to italy in july."

# No prompt
no_prompt = text
no_result = classifier(no_prompt, candidate_labels)


# Simple prompt
simple_prompt = "Classify the following text: " + text
simple_result = classifier(simple_prompt, candidate_labels)

# Primed prompt
primed_prompt = "I like to cycle and I burn easily. I also love culture and like to post on social media about my food. I will go on a trip to italy in july. What are the most important things to pack for the trip?"
primed_result = classifier(primed_prompt, candidate_labels)

print("No prompt result:", no_result)
print("Simple prompt result:", simple_result)
print("Primed prompt result:", primed_result)


No prompt result: {'sequence': 'I like to cycle and I burn easily. I also love culture and like to post on social media about my food. I will go on a trip to italy in july.', 'labels': ['Travel-sized toiletries', 'Refillable water bottle', 'Aloe vera gel', 'Snorkel gear', 'Waterproof phone case', 'Packable rain jacket', 'Reusable shopping bags', 'Reusable coffee mug', 'Reusable water bottle', 'First aid kit', 'Travel insurance documents', 'Work ID badge', 'Lightweight backpack', 'Presentation materials', 'Flip-flops', 'Charging cables', 'Hiking boots', 'Comfortable shoes', 'Fanny pack', 'Trekking poles', 'Visa documents', 'Baby wipes', 'Quick-dry towel', 'Baby blanket', 'Hostel lock', 'Blanket', 'Business attire', 'Laptop', 'Beanie', 'Bug spray', 'Travel pillow', 'Baby clothes', 'Passport', 'Earplugs', 'Camping stove', 'Travel journal', 'Emergency roadside kit', 'Baby food', 'Pen', 'Bandana', 'Dress shoes', 'Snacks', 'Travel crib', 'Sunscreen', 'Ski goggles', 'Sunglasses', 'Sunglasses'