# Clasificacion de intencionalidad del usuario
El proyecto consta en clasificar que esta intentando realizar el usuario utilizando Procesamiento de Lenguaje Natural, Transformers y Torch.

Durante el proyecto se llego a la conclusion de que el clasificador dio como resultado un 98% de asertividad (accuracy) por lo que clasifica bien 98 de cada 100 frases.

In [1]:
import torch

torch.cuda.set_device(0)

print("CUDA available:", torch.cuda.is_available())
print("Device name:", torch.cuda.get_device_name(0))


CUDA available: True
Device name: NVIDIA GeForce RTX 4060 Laptop GPU


# Categorization

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification, Trainer, TrainingArguments
import torch

# Load the dataset
df = pd.read_csv('Datasets_NLU/Dataset_final_with_joke.csv')

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(df['Sentence'], df['Intent'], test_size=0.2, random_state=42)

# Tokenizer for DistilBERT
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')

# Tokenize the dataset
train_encodings = tokenizer(list(X_train), truncation=True, padding=True, max_length=128)
test_encodings = tokenizer(list(X_test), truncation=True, padding=True, max_length=128)

# Convert labels to tensors
intent_labels = df['Intent'].unique()
intent_label_map = {label: idx for idx, label in enumerate(intent_labels)}

train_labels = [intent_label_map[intent] for intent in y_train]
test_labels = [intent_label_map[intent] for intent in y_test]

train_labels = torch.tensor(train_labels)
test_labels = torch.tensor(test_labels)

# Dataset class for PyTorch
class NLU_Dataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = self.labels[idx]
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = NLU_Dataset(train_encodings, train_labels)
test_dataset = NLU_Dataset(test_encodings, test_labels)

# Load the model
model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=len(intent_labels))

# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy="steps"
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset
)

# Train the model
trainer.train()

# Make predictions
predictions = trainer.predict(test_dataset)

# Convert the NumPy array to a PyTorch tensor before using argmax
preds = torch.tensor(predictions.predictions).argmax(axis=-1)

# Convert predicted labels back to intention names
predicted_intents = [intent_labels[pred] for pred in preds]

# Compare predicted intents with real intents
real_intents = [intent_label_map[intent] for intent in y_test]  # Convert real intents to numeric values

# Calculate accuracy
accuracy = accuracy_score(real_intents, preds)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Show classification report (Precision, Recall, F1-Score)
print("\nClassification Report:")
print(classification_report(real_intents, preds, target_names=intent_labels))

# Optional: Display comparison between predicted and real intents
comparison_df = pd.DataFrame({
    'Sentence': X_test,
    'Real Intent': y_test,
    'Predicted Intent': predicted_intents
})
print("\nComparison between real and predicted intents:")
print(comparison_df.head(10))  # Display the first 10 comparisons

  from .autonotebook import tqdm as notebook_tqdm





Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  attn_output = torch.nn.functional.scaled_dot_product_attention(


Step,Training Loss,Validation Loss
10,2.848,2.848773
20,2.8454,2.843713
30,2.8325,2.835877
40,2.828,2.822277
50,2.8073,2.799058
60,2.7946,2.768227
70,2.7478,2.71601
80,2.715,2.645125
90,2.6412,2.550757
100,2.4932,2.442733


Accuracy: 97.91%

Classification Report:
                  precision    recall  f1-score   support

       PlayMusic       1.00      1.00      1.00        13
  IncreaseVolume       1.00      1.00      1.00        21
      GetWeather       1.00      1.00      1.00        12
            Time       1.00      1.00      1.00        23
    TurnOnLights       1.00      1.00      1.00        28
  InternetSearch       0.87      1.00      0.93        46
            Maps       1.00      0.76      0.86        25
  BookRestaurant       1.00      1.00      1.00        32
   YouTubeVideos       1.00      0.95      0.97        19
  DecreaseVolume       1.00      0.95      0.97        20
      MuteVolume       0.92      1.00      0.96        11
       SetVolume       1.00      1.00      1.00        26
   YoutubeVideos       1.00      1.00      1.00        22
       Translate       1.00      1.00      1.00        21
StockMarketQuery       1.00      1.00      1.00        18
      Initialize       1.00   

In [3]:
df['Intent'].value_counts()

Intent
InternetSearch      189
Time                147
SetVolume           137
YouTubeVideos       129
BookRestaurant      123
YoutubeVideos       112
MuteVolume          106
IncreaseVolume      105
DecreaseVolume      105
TurnOnLights        104
Maps                104
StockMarketQuery    100
Initialize          100
Joke                100
Translate           100
GetWeather           73
PlayMusic            72
Name: count, dtype: int64

# Preprocessing

In [4]:
import spacy

# Load spaCy Transformer-based English model
nlp = spacy.load('en_core_web_trf')

# Set of action words (lemmas)
action_words = set([
    'play', 'open', 'book', 'set', 'turn', 'call', 'email', 'buy', 'cook', 'send',
    'increase', 'decrease', 'mute', 'lock', 'unlock', 'remind', 'wake', 'schedule',
    'watch', 'read', 'write', 'summarize', 'submit', 'practice', 'message',
    'update', 'listen', 'find', 'search', 'get', 'make', 'give', 'take', 'tell',
    'ask', 'work', 'go', 'do', 'be', 'have', 'prepare', 'pack', 'check', 'respond',
    'delete', 'start', 'stop', 'pause', 'resume', 'drive', 'park', 'notify',
    'forget', 'order', 'share', 'light', 'close', 'strike', 'continue', 'begin',
    'finish', 'analyze', 'proceed', 'monitor', 'log', 'facilitate', 'engage',
    'optimize', 'deploy', 'research', 'compile', 'pick', 'drop', 'wrap', 'meet',
    'discuss', 'inform', 'shut', 'put', 'connect', 'disconnect', 'install', 'uninstall',
    'activate', 'deactivate', 'adjust', 'clear', 'show', 'hide', 'track', 'follow',
    'block', 'unblock', 'build', 'fix', 'draw', 'print', 'edit', 'clip', 'crop',
    'zoom', 'upload', 'download', 'record', 'stream', 'playback', 'charge', 'pay',
    'deliver', 'explore', 'design', 'test', 'help', 'assist', 'update', 'organize',
    'remind', 'plan', 'recommend', 'subscribe', 'unsubscribe', 'post', 'comment',
    'like', 'dislike', 'tag', 'mention', 'call', 'text', 'scan', 'encrypt', 'decrypt',
    'sign', 'authenticate', 'scan', 'record', 'repeat', 'shuffle', 'translate'
])


def extract_intentions(sentence):
    doc = nlp(sentence)
    intentions = []
    current_intent = []

    for token in doc:
        # Identify if the token is an action verb or auxiliary
        is_action = (
            token.lemma_.lower() in action_words and
            token.pos_ in ('VERB', 'AUX')
        )

        if is_action:
            # If we encounter a new action and have accumulated tokens, store the current intention
            if current_intent:
                intentions.append(' '.join(tok.text for tok in current_intent).strip())
                current_intent = []

        # Accumulate tokens for the current intention
        current_intent.append(token)

        # Handle conjunctions (e.g., 'and', 'or') connecting different actions
        if token.dep_ == 'cc' and current_intent:
            intentions.append(' '.join(tok.text for tok in current_intent).strip())
            current_intent = []

    # Add the final intention if it exists
    if current_intent:
        intentions.append(' '.join(tok.text for tok in current_intent).strip())

    return intentions

# Test sentences
if __name__ == "__main__":
    sentences = [
        "Play Barbie Girl, search Coffe, and increase the volume.",
        "Play music, turn off the lights, and lock the doors.",
    ]

    for sentence in sentences:
        intentions = extract_intentions(sentence)
        print(f"Sentence: {sentence}")
        print("Extracted Intentions:")
        for intent in intentions:
            print(f"- {intent}")
        print("-" * 50)


  model.load_state_dict(torch.load(filelike, map_location=device))


Sentence: Play Barbie Girl, search Coffe, and increase the volume.
Extracted Intentions:
- Play Barbie Girl ,
- search Coffe , and
- increase the volume .
--------------------------------------------------
Sentence: Play music, turn off the lights, and lock the doors.
Extracted Intentions:
- Play music ,
- turn off the lights , and
- lock the doors .
--------------------------------------------------


  with torch.cuda.amp.autocast(self._mixed_precision):


# Preprocessing and Categorization

In [5]:
sentences_predict = extract_intentions('play taylor swift and increase the volume')
df_sentences = pd.DataFrame(sentences_predict, columns=['sentences'])
df_sentences

Unnamed: 0,sentences
0,play taylor swift and
1,increase the volume


In [6]:
new_sentences_encodings = tokenizer(list(df_sentences['sentences']), truncation=True, padding=True, max_length=128)

# Create a dataset using the same class
new_sentences_dataset = NLU_Dataset(new_sentences_encodings, torch.tensor([0]*len(df_sentences)))  # Labels are dummy values

sentence_predicted = trainer.predict(new_sentences_dataset)

# Predict on the new sentences
new_predictions = trainer.predict(new_sentences_dataset)

# Convert the NumPy array to a PyTorch tensor and get the predicted intents
new_preds = torch.tensor(new_predictions.predictions).argmax(axis=-1)

# Convert predicted labels back to intention names
predicted_new_intents = [intent_labels[pred] for pred in new_preds]

# Show the predictions
df_sentences['Predicted Intent'] = predicted_new_intents


In [7]:
df_sentences

Unnamed: 0,sentences,Predicted Intent
0,play taylor swift and,PlayMusic
1,increase the volume,IncreaseVolume


# Split and Categorization

In [8]:
def split_and_categorize(user_input):
    sentences_predict = extract_intentions(user_input)
    df_sentences = pd.DataFrame(sentences_predict, columns=['sentences'])
    new_sentences_encodings = tokenizer(list(df_sentences['sentences']), truncation=True, padding=True, max_length=128)

    # Create a dataset using the same class
    new_sentences_dataset = NLU_Dataset(new_sentences_encodings, torch.tensor([0]*len(df_sentences)))  # Labels are dummy values
    
    sentence_predicted = trainer.predict(new_sentences_dataset)
    
    # Predict on the new sentences
    new_predictions = trainer.predict(new_sentences_dataset)
    
    # Convert the NumPy array to a PyTorch tensor and get the predicted intents
    new_preds = torch.tensor(new_predictions.predictions).argmax(axis=-1)
    
    # Convert predicted labels back to intention names
    predicted_new_intents = [intent_labels[pred] for pred in new_preds]
    
    # Show the predictions
    df_sentences['Predicted Intent'] = predicted_new_intents
    return df_sentences

In [10]:
categorized_sentences = split_and_categorize('shut up the video')
categorized_sentences 

  with torch.cuda.amp.autocast(self._mixed_precision):


Unnamed: 0,sentences,Predicted Intent
0,shut up the video,MuteVolume
