#NLP → Robot Commands

Install dependencies

In [1]:
!pip install transformers datasets torch sentencepiece accelerate




**dataset**

In [2]:
from google.colab import files
uploaded = files.upload()


Saving nlp_robot_commands_100.xlsx to nlp_robot_commands_100.xlsx


**Use the dataset for training**

In [3]:
import pandas as pd

df = pd.read_excel("nlp_robot_commands.xlsx")
df.head()
texts = df["text"].tolist()
labels = df["label"].tolist()


**Encode labels**

In [4]:
# Label encoding
unique_labels = df["label"].unique()
label2id = {label: i for i, label in enumerate(unique_labels)}
id2label = {i: label for label, i in label2id.items()}

df["label_id"] = df["label"].map(label2id)
df.head()


Unnamed: 0,text,label,label_id
0,move the arm to the right,move_right,0
1,go right,move_right,0
2,shift slightly right,move_right,0
3,move a bit right,move_right,0
4,slide the arm right,move_right,0


**Train/test split**

In [5]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
train_df.shape, test_df.shape


((48, 3), (12, 3))

**Convert to HuggingFace Dataset**

In [24]:
from datasets import Dataset
from transformers import AutoTokenizer

model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=32)

train_ds = Dataset.from_pandas(train_df)
test_ds = Dataset.from_pandas(test_df)

train_ds = train_ds.map(tokenize, batched=True)
test_ds = test_ds.map(tokenize, batched=True)

train_ds = train_ds.rename_column("label_id", "labels")
test_ds = test_ds.rename_column("label_id", "labels")

# Remove the original string 'label' column and the '__index_level_0__' column
train_ds = train_ds.remove_columns(["label", "__index_level_0__"])
test_ds = test_ds.remove_columns(["label", "__index_level_0__"])

# Set the format to torch, explicitly selecting only the necessary columns
train_ds.set_format("torch", columns=["input_ids", "attention_mask", "labels"])
test_ds.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

Map:   0%|          | 0/48 [00:00<?, ? examples/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

**Load tokenizer**

In [25]:
from transformers import AutoTokenizer
from datasets import Dataset

model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=32)

# Ensure train_df and test_df are fresh before re-tokenizing and re-formatting
train_ds = Dataset.from_pandas(train_df)
test_ds = Dataset.from_pandas(test_df)

train_ds = train_ds.map(tokenize, batched=True)
test_ds = test_ds.map(tokenize, batched=True)

train_ds = train_ds.rename_column("label_id", "labels")
test_ds = test_ds.rename_column("label_id", "labels")

# Remove the original string 'label' column, 'text' column, and the '__index_level_0__' column
train_ds = train_ds.remove_columns(["text", "label", "__index_level_0__"])
test_ds = test_ds.remove_columns(["text", "label", "__index_level_0__"])

# Set the format to torch, explicitly selecting only the necessary columns
train_ds.set_format("torch", columns=["input_ids", "attention_mask", "labels"])
test_ds.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

Map:   0%|          | 0/48 [00:00<?, ? examples/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

**Load model**

In [8]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=len(unique_labels),
    id2label=id2label,
    label2id=label2id
)


model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [21]:
# Create label mapping
unique_labels = df['label'].unique()
label2id = {label: i for i, label in enumerate(unique_labels)}
id2label = {i: label for label, i in label2id.items()}

#Map labels to integers
df['label_id'] = df['label'].map(label2id)

#Convert to HuggingFace Dataset
from datasets import Dataset
dataset = Dataset.from_pandas(df)

dataset = dataset.rename_column("label_id", "labels")  # Trainer expects 'labels'

#Tokenize
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=32)

dataset = dataset.map(tokenize, batched=True)
dataset.set_format("torch")


Map:   0%|          | 0/60 [00:00<?, ? examples/s]

**Training**

In [29]:
import os
os.environ["WANDB_DISABLED"] = "true"

from transformers import TrainingArguments, Trainer # Import Trainer

training_args = TrainingArguments(
    output_dir="./intent_model",
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="epoch", # Added to log training loss per epoch
    learning_rate=1e-5,
    per_device_train_batch_size=8,
    num_train_epochs=24,
    weight_decay=0.01,
    report_to="none" # disable wandb, tensorboard, etc.
)

trainer = Trainer( # Changed from trainer to Trainer
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=test_ds,
)

trainer.train()

Epoch,Training Loss,Validation Loss
1,0.6714,1.023233
2,0.6816,1.001804
3,0.6608,0.97751
4,0.6103,0.935151
5,0.5744,0.913896
6,0.5118,0.895798
7,0.5056,0.873405
8,0.4653,0.859721
9,0.4147,0.850746
10,0.4021,0.836934


TrainOutput(global_step=144, training_loss=0.3910209809740384, metrics={'train_runtime': 350.485, 'train_samples_per_second': 3.287, 'train_steps_per_second': 0.411, 'total_flos': 9538333065216.0, 'train_loss': 0.3910209809740384, 'epoch': 24.0})

**Save Model**

In [30]:
model.save_pretrained("robot_intent_model")
tokenizer.save_pretrained("robot_intent_model")


('robot_intent_model/tokenizer_config.json',
 'robot_intent_model/special_tokens_map.json',
 'robot_intent_model/vocab.txt',
 'robot_intent_model/added_tokens.json',
 'robot_intent_model/tokenizer.json')

**Inference Function**

In [33]:
import torch

def predict_intent(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    # Move inputs to the same device as the model
    inputs = {k: v.to(model.device) for k, v in inputs.items()}
    with torch.no_grad():
        outputs = model(**inputs)
    pred_id = torch.argmax(outputs.logits).item()
    return id2label[pred_id]

#Test
commands = [
    "move right please",
    "go to the left",
    "grab it now",
    "let go",
    "move up a bit"
]

for cmd in commands:
    print(cmd, "→", predict_intent(cmd))

move right please → move_right
go to the left → move_left
grab it now → grip
let go → release
move up a bit → move_up


In [37]:
# Enter your sentence here
user_text = input("Enter command sentence: ")

# Predict intent using your trained model
intent = predict_intent(user_text)

print("\nPredicted Robot Command:", intent)


Enter command sentence: focus 

Predicted Robot Command: release
