In [None]:
!pip install transformers datasets evaluate accelerate scikit-learn

Collecting transformers
  Using cached transformers-4.49.0-py3-none-any.whl.metadata (44 kB)
Collecting datasets
  Using cached datasets-3.4.1-py3-none-any.whl.metadata (19 kB)
Collecting evaluate
  Using cached evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting accelerate
  Using cached accelerate-1.5.2-py3-none-any.whl.metadata (19 kB)
Collecting scikit-learn
  Using cached scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting filelock (from transformers)
  Using cached filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting huggingface-hub<1.0,>=0.26.0 (from transformers)
  Using cached huggingface_hub-0.29.3-py3-none-any.whl.metadata (13 kB)
Collecting regex!=2019.12.17 (from transformers)
  Using cached regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)
Collecting tokenizers<0.22,>=0.21 (from transformers)
  Using cached tokenizers-0.21.1-cp39-abi3-manylinux_2_17_x86_64.man

In [1]:
import torch

print(torch.cuda.is_available())  # Should print True if a GPU is available
print(torch.cuda.device_count())  # Number of GPUs available
print(torch.cuda.get_device_name(0))  # GPU model name


False
0


AssertionError: Torch not compiled with CUDA enabled

In [1]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
from datasets import load_dataset
from torch.utils.data import DataLoader

# Load dataset
dataset = load_dataset("clinc_oos", "plus")

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

# Tokenization function
def tokenize_function(example):
    return tokenizer(example["text"], padding="max_length", truncation=True)

# Tokenize dataset
tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets
# Convert dataset to PyTorch format


  from .autonotebook import tqdm as notebook_tqdm


DatasetDict({
    train: Dataset({
        features: ['text', 'intent', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 15250
    })
    validation: Dataset({
        features: ['text', 'intent', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 3100
    })
    test: Dataset({
        features: ['text', 'intent', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 5500
    })
})

In [2]:
tokenized_datasets = tokenized_datasets.rename_column("intent", "labels")

tokenized_datasets.set_format("torch")

# Split dataset
train_dataset = tokenized_datasets["train"]
test_dataset = tokenized_datasets["test"]

# Load pre-trained model
num_labels = len(dataset["train"].features["intent"].names)
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=num_labels)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)

# Train model
# trainer.train()

# Save model
model.save_pretrained("./intent_classifier")
tokenizer.save_pretrained("./intent_classifier")


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('./intent_classifier\\tokenizer_config.json',
 './intent_classifier\\special_tokens_map.json',
 './intent_classifier\\vocab.txt',
 './intent_classifier\\added_tokens.json',
 './intent_classifier\\tokenizer.json')

In [3]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch

# Load the trained model and tokenizer
model_path = "./intent_classifier"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [4]:
def predict_intent(text):
    # Tokenize input text
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    
    # Move inputs to GPU if available
    inputs = {key: val.to(device) for key, val in inputs.items()}
    
    # Perform inference
    with torch.no_grad():
        outputs = model(**inputs)

    # Get predicted class
    logits = outputs.logits
    predicted_class = torch.argmax(logits, dim=1).item()
    
    return predicted_class


In [5]:
text_input = "What is the weather like today?"
predicted_label = predict_intent(text_input)
print(f"Predicted Intent Label: {predicted_label}")


Predicted Intent Label: 138


In [7]:
label_name

NameError: name 'label_name' is not defined

In [10]:
dataset['train']['intent']

[61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 133,
 1