## Import Necessary Libarires

In [1]:
! pip install datasets

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m480.6/480.6 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ

In [2]:
import pandas as pd # pandas for data manipulation and handling dataframe object
from sklearn.model_selection import train_test_split # Scikit-learn for train test
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification  # HuggingFace  Transformer for tokenization and Model
from transformers import Trainer, TrainingArguments ## HuggingFace Trainer API for model training and fine-tuning
from datasets import Dataset # HuggingFace datsets library to handle dataset objects and easy integration with models
import torch # pytorch for tensor operation and model handling
from sklearn.metrics import classification_report, confusion_matrix

# Load Datasets

In [3]:
df = pd.read_csv("train.csv")

In [4]:
df

Unnamed: 0,text,intent
0,listen to westbam alumb allergic on google music,PlayMusic
1,add step to me to the 50 cl√°sicos playlist,AddToPlaylist
2,i give this current textbook a rating value of...,RateBook
3,play the song little robin redbreast,PlayMusic
4,please add iris dement to my playlist this is ...,AddToPlaylist
...,...,...
13079,i want to eat choucroute at a brasserie for 8,BookRestaurant
13080,play funky heavy bluesy,PlayMusic
13081,rate the current album 2 points out of 6,RateBook
13082,go to the photograph the inflated tear,SearchCreativeWork


In [5]:
df['intent'].value_counts()

Unnamed: 0_level_0,count
intent,Unnamed: 1_level_1
PlayMusic,1914
GetWeather,1896
BookRestaurant,1881
RateBook,1876
SearchScreeningEvent,1852
SearchCreativeWork,1847
AddToPlaylist,1818


In [6]:
df.isnull().sum()

Unnamed: 0,0
text,0
intent,0


# Cleaning

In [7]:
df['intent'] = df['intent'].apply(lambda x: x.lower())

In [10]:
import nltk
from nltk.corpus import stopwords
# Downnload NLTK stpwords(only need to do thi once)

nltk.download('stopwords')

# Load the list of stopwords
stop_words = set(stopwords.words('english'))

# Preprocessing function :Convert text to lower and remove stopword
def preprocess_text(text):
  # convert text to lowercase

  text = text.lower()

  # Remove the stopwords: Split the text, filter out words and join back
  text = ' '.join ([word for word in text.split() if word not in stop_words
                   ])
  return text

# Example

text_input = 'Stop to play Music'

# Preprocess the text
processed_text = preprocess_text(text_input)
print(f"processed Text :{processed_text}")


processed Text :stop play music


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [11]:
df['text'] = df['text'].apply(preprocess_text)

## Encode the Intent

In [12]:
# Check the unique labels in the 'Intent_Label' column
unique_labels = df['intent'].unique()
print("unique labels :", pd.unique)

# Create a mapping from labels to numbers (numeric encoding)
label_to_id = {label: i for i, label in enumerate(unique_labels)}

# Map the 'Intent_Label' to numeric labels in the 'Label' column
df['Label'] = df['intent'].map(label_to_id)

# Check the updated DataFrame
df.head()

unique labels : <function unique at 0x7bcbc3f275b0>


Unnamed: 0,text,intent,Label
0,listen westbam alumb allergic google music,playmusic,0
1,add step 50 cl√°sicos playlist,addtoplaylist,1
2,give current textbook rating value 1 best rati...,ratebook,2
3,play song little robin redbreast,playmusic,0
4,please add iris dement playlist selena,addtoplaylist,1


#Tokenitation

In [13]:
# Load the DistilBERT tokenizer
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")  # Load pre-trained DistilBERT for  tokenizer purpose

# Calculate the maximum tokenized length from the dataset
max_length = max([len(tokenizer.encode(text)) for text in df['text']])  # Tokenize and count the length of each tokenized text
print("Max Length:", max_length)

# Tokenization function with labels
def tokenize_function(examples):  # Define a function to tokenize inputs and add labels
    tokenized_input = tokenizer(examples['text'], padding='max_length', truncation=True, max_length=max_length)  # Use the max_length calculated above
    tokenized_input['labels'] = examples['Label']  # Add labels to the tokenized data for supervised training
    return tokenized_input  # Return tokenized data with labels


dataset = Dataset.from_pandas(df[['text', 'Label']])  # Convert the DataFrame into a HuggingFace Dataset

# Apply tokenization
dataset = dataset.map(tokenize_function, batched=True)  # Apply the tokenization function to the dataset

# Check tokenized data
dataset[0]  # Display the tokenized version of the first example in the dataset to verify the transformation

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

loading file vocab.txt from cache at /root/.cache/huggingface/hub/models--distilbert-base-uncased/snapshots/12040accade4e8a0f71eabdb258fecc2e7e948be/vocab.txt
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at None
loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--distilbert-base-uncased/snapshots/12040accade4e8a0f71eabdb258fecc2e7e948be/tokenizer_config.json
loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--distilbert-base-uncased/snapshots/12040accade4e8a0f71eabdb258fecc2e7e948be/tokenizer.json


config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--distilbert-base-uncased/snapshots/12040accade4e8a0f71eabdb258fecc2e7e948be/config.json
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.46.2",
  "vocab_size": 30522
}



Max Length: 28


Map:   0%|          | 0/13084 [00:00<?, ? examples/s]

{'text': 'listen westbam alumb allergic google music',
 'Label': 0,
 'input_ids': [101,
  4952,
  2225,
  3676,
  2213,
  2632,
  25438,
  27395,
  8224,
  2189,
  102,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 'attention_mask': [1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 'labels': 0}

# Fine Tune Model

In [14]:
# Initialize DistilBERT model for sequence classification
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=len(unique_labels))

# Move model to GPU if available
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--distilbert-base-uncased/snapshots/12040accade4e8a0f71eabdb258fecc2e7e948be/config.json
Model config DistilBertConfig {
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6": 6
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.46.2",
  "vocab_size": 30522
}



model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--distilbert-base-uncased/snapshots/12040accade4e8a0f71eabdb258fecc2e7e948be/model.safetensors
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_projector.bias', 'vocab_transform.bias', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification 

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)


In [16]:
from transformers import TrainingArguments, Trainer  # Import necessary classes from Hugging Face

# Training arguments
training_args = TrainingArguments(  # Define the training configurations
    output_dir="./results",  # Directory to save results (model checkpoints, logs, etc.)
    evaluation_strategy="epoch",  # Evaluate the model at the end of each epoch
    learning_rate=2e-5,  # Set learning rate for the optimizer
    per_device_train_batch_size=16,  # Batch size for training (number of examples per device)
    per_device_eval_batch_size=64,  # Batch size for evaluation
    num_train_epochs=20,  # Number of epochs to train the model
    weight_decay=0.01,  # L2 regularization to avoid overfitting
    logging_dir="./logs",  # Directory to store training logs
    logging_steps=10,  # Log training information every 10 steps
)

# Trainer setup
trainer = Trainer(  # Initialize the Trainer with the model and training configurations
    model=model,  # Model to be trained
    args=training_args,  # Training arguments
    train_dataset=dataset,  # Training dataset
    eval_dataset=dataset,  # Validation dataset (in practice, this should be a separate dataset)
)


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [17]:
# Train the model
trainer.train()  # Start training the model based on the provided training arguments

The following columns in the training set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: Label, text. If Label, text are not expected by `DistilBertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 13,084
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 16,360
  Number of trainable parameters = 66,958,855
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑


[34m[1mwandb[0m: [32m[41mERROR[0m API key must be 40 characters long, yours was 42


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑


[34m[1mwandb[0m: [32m[41mERROR[0m API key must be 40 characters long, yours was 42


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Epoch,Training Loss,Validation Loss
1,0.0517,0.03423
2,0.0214,0.020815
3,0.0598,0.011551
4,0.0004,0.003844
5,0.006,0.002985
6,0.0001,0.000162
7,0.0351,0.000258
8,0.0001,0.000377
9,0.0174,0.001145
10,0.0015,7e-05


Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/model.safetensors
The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: Label, text. If Label, text are not expected by `DistilBertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 13084
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-1000
Configuration saved in ./results/checkpoint-1000/config.json
Model weights saved in ./results/checkpoint-1000/model.safetensors
Saving model checkpoint to ./results/checkpoint-1500
Configuration saved in ./results/checkpoint-1500/config.json
Model weights saved in ./results/checkpoint-1500/model.safetensors
The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequen

TrainOutput(global_step=16360, training_loss=0.017984781261806623, metrics={'train_runtime': 1605.2073, 'train_samples_per_second': 163.019, 'train_steps_per_second': 10.192, 'total_flos': 1895860301704320.0, 'train_loss': 0.017984781261806623, 'epoch': 20.0})

## Evaluation


In [32]:
from sklearn.metrics import classification_report, confusion_matrix

# Evaluate the model on the test dataset
predictions, true_labels, _ = trainer.predict(dataset)

# Convert predictions to label indices
predicted_labels = predictions.argmax(axis=1)

# Generate classification report and confusion matrix
print("Classification Report:")
print(classification_report(true_labels, predicted_labels))

print("Confusion Matrix:")
print(confusion_matrix(true_labels, predicted_labels))

The following columns in the test set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: Label, text. If Label, text are not expected by `DistilBertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Prediction *****
  Num examples = 13084
  Batch size = 64


Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1914
           1       1.00      1.00      1.00      1818
           2       1.00      1.00      1.00      1876
           3       1.00      1.00      1.00      1852
           4       1.00      1.00      1.00      1881
           5       1.00      1.00      1.00      1896
           6       1.00      1.00      1.00      1847

    accuracy                           1.00     13084
   macro avg       1.00      1.00      1.00     13084
weighted avg       1.00      1.00      1.00     13084

Confusion Matrix:
[[1914    0    0    0    0    0    0]
 [   0 1818    0    0    0    0    0]
 [   0    0 1876    0    0    0    0]
 [   0    0    0 1852    0    0    0]
 [   0    0    0    0 1881    0    0]
 [   0    0    0    0    0 1896    0]
 [   0    0    0    0    0    0 1847]]


## Save Model

In [33]:

# Save the model and tokenizer
model.save_pretrained('./saved_fine_tuned_model')
tokenizer.save_pretrained('./saved_fine_tuned_model')

Configuration saved in ./saved_fine_tuned_model/config.json
Model weights saved in ./saved_fine_tuned_model/model.safetensors
tokenizer config file saved in ./saved_fine_tuned_model/tokenizer_config.json
Special tokens file saved in ./saved_fine_tuned_model/special_tokens_map.json


('./saved_fine_tuned_model/tokenizer_config.json',
 './saved_fine_tuned_model/special_tokens_map.json',
 './saved_fine_tuned_model/vocab.txt',
 './saved_fine_tuned_model/added_tokens.json')

#
Inference: Prediction System

In [34]:
# Load the fine-tuned model and tokenizer for inference
model = DistilBertForSequenceClassification.from_pretrained('./saved_fine_tuned_model')
tokenizer = DistilBertTokenizer.from_pretrained('./saved_fine_tuned_model')

loading configuration file ./saved_fine_tuned_model/config.json
Model config DistilBertConfig {
  "_name_or_path": "./saved_fine_tuned_model",
  "activation": "gelu",
  "architectures": [
    "DistilBertForSequenceClassification"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6": 6
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "problem_type": "single_label_classification",
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "torch_dtype": "float32",
  "transformers_version": "4.46.2",
  "vocab

In [35]:
# Now create the reverse mapping for inference
id_to_label = {i: label for label, i in label_to_id.items()}  # Reverse the mapping
id_to_label

{0: 'playmusic',
 1: 'addtoplaylist',
 2: 'ratebook',
 3: 'searchscreeningevent',
 4: 'bookrestaurant',
 5: 'getweather',
 6: 'searchcreativework'}

In [36]:
# Function to make a prediction
def predict(text, model, tokenizer, max_length=21):
    # Preprocess the input text
    text = preprocess_text(text)
    # Tokenize the input text
    inputs = tokenizer(text, padding='max_length', truncation=True, max_length=max_length, return_tensors="pt")

    # Make prediction
    with torch.no_grad():  # Disable gradient calculation for inference
        outputs = model(**inputs)  # Get model output
        logits = outputs.logits  # Get logits from the output

    # Get the predicted label (highest logit)
    predicted_class_id = torch.argmax(logits, dim=-1).item()  # Get the index of the max logit


In [40]:
def predict(text, model, tokenizer, max_length=21):
    # Preprocess the input text
    text = preprocess_text(text)
    # Tokenize the input text
    inputs = tokenizer(text, padding='max_length', truncation=True, max_length=max_length, return_tensors="pt")

    # Make prediction
    with torch.no_grad():  # Disable gradient calculation for inference
        outputs = model(**inputs)  # Get model output
        logits = outputs.logits  # Get logits from the output

    # Get the predicted label (highest logit)
    predicted_class_id = torch.argmax(logits, dim=-1).item()  # Get the index of the max logit

    return predicted_class_id # Return the predicted class ID

In [41]:
# Define the intent labels
id_to_label = {
    0: 'playmusic',
    1: 'addtoplaylist',
    2: 'ratebook',
    3: 'searchscreeningevent',
    4: 'bookrestaurant',
    5: 'getweather',
    6: 'searchcreativework'
}
# Example messages for testing
test_messages = [

    "What's the weather like today?",
    "Find me a creative project about AI.",
    "Are there any events screening this weekend?",
    "Add this song to my playlist.",
    "I'd like to book a table for two.",
    "Rate the book I just finished reading.",
    "Play some relaxing music.",
    "Can you find a documentary on climate change?",
    "What time does the movie start tonight?",
    "Add the new album to my library."
]

# Test the model with the example messages
for message in test_messages:
    predicted_label = predict(message, model, tokenizer)
    predicted_intent = id_to_label.get(predicted_label, "Unknown Intent")
    print(f"Message: {message}")
    print(f"Predicted Label: {predicted_label}, Predicted Intent: {predicted_intent}\n")

Message: What's the weather like today?
Predicted Label: 5, Predicted Intent: getweather

Message: Find me a creative project about AI.
Predicted Label: 6, Predicted Intent: searchcreativework

Message: Are there any events screening this weekend?
Predicted Label: 3, Predicted Intent: searchscreeningevent

Message: Add this song to my playlist.
Predicted Label: 1, Predicted Intent: addtoplaylist

Message: I'd like to book a table for two.
Predicted Label: 4, Predicted Intent: bookrestaurant

Message: Rate the book I just finished reading.
Predicted Label: 2, Predicted Intent: ratebook

Message: Play some relaxing music.
Predicted Label: 0, Predicted Intent: playmusic

Message: Can you find a documentary on climate change?
Predicted Label: 6, Predicted Intent: searchcreativework

Message: What time does the movie start tonight?
Predicted Label: 3, Predicted Intent: searchscreeningevent

Message: Add the new album to my library.
Predicted Label: 1, Predicted Intent: addtoplaylist



# Download directly to your pc

In [None]:
import shutil

# Path to save the model
model_dir = "/content/saved_fine_tuned_model"
model.save_pretrained(model_dir)  # Save the model
tokenizer.save_pretrained(model_dir)  # Save the tokenizer

# Zip the model folder
shutil.make_archive("distilbert_model", 'zip', model_dir)

# Download the zipped model
from google.colab import files
files.download("distilbert_model.zip")