In [None]:
import pandas as pd

In [None]:
id2label = {0: "Chat", 1: "Forward", 2: "Backward", 3: "Left", 4: "Right"}

In [None]:
df = pd.read_csv('./data/intent.csv')
df.head()

#rename columns to text and labels
df.columns = ['text', 'labels']

df

In [None]:
import os
import pandas as pd
import re
import os
import string

import torch
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments, BertModel
from arabert.preprocess import ArabertPreprocessor
from transformers import AutoTokenizer
from datasets import Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

import warnings
warnings.filterwarnings("ignore")
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [None]:
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=5)

In [None]:
train_df, test_df = train_test_split(df, test_size=0.2, stratify=df['labels'], random_state=42)

In [None]:
def tokenize_function(examples):
    return tokenizer(examples['text'], truncation=True, padding=True)

train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)

train_dataset = train_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

In [None]:
def accuracy_metric(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    return {"accuracy": (labels == preds).mean().item()}

In [None]:
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=10,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=8,
    save_steps=1000,
    eval_steps=500,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=lambda pred: accuracy_metric(pred),
)

trainer.train()

In [None]:
model = model.to("mps")
trainer.evaluate()

In [None]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch

# Load the tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=5)

# Single sentence to test
sentence = "tell me about the word forward"

# Tokenize the input sentence
inputs = tokenizer(sentence, truncation=True, padding=True, return_tensors="pt")

# Make predictions
with torch.no_grad():
    outputs = model(**inputs)

# Get predicted class
predictions = torch.argmax(outputs.logits, dim=-1)
predicted_class = predictions.item()

# Define class labels (adjust as per your dataset)
class_labels = ['Chat', 'Forward', 'Backward', 'Left', 'Right']

# Print results
print(f"Sentence: {sentence}")
print(f"Predicted Intent: {class_labels[predicted_class]}")

In [None]:
trainer.save_model(f"./models/intent")

In [None]:
tokenizer.save_pretrained('./models/tokenizer')