<a href="https://colab.research.google.com/github/NatureBase/MidtermTask/blob/main/MidtermTask.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install transformers

Looking in indexes: https://download.pytorch.org/whl/cu118


In [12]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch

# Load pre-trained model and tokenizer
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)

# Input text
text = "start move_right move_right accelerate move_left stationary stop"



# Tokenize input text
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
print("Token IDs:", inputs["input_ids"])
print("Attention Mask:", inputs["attention_mask"])

# Run the model on the input
with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits
    predicted_class = torch.argmax(logits, dim=1).item()

print("Predicted class:", predicted_class)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Token IDs: tensor([[  101,  2707,  2693,  1035,  2157,  2693,  1035,  2157, 23306,  2693,
          1035,  2187, 17337,  2644,   102]])
Attention Mask: tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
Predicted class: 1


In [13]:
# Map output to actions
actions = {0: "walking", 1: "running", 2: "stationary"}
print("Predicted action:", actions[predicted_class])

Predicted action: running


In [14]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments

# Define a simple dataset for action recognition
class ActionRecognitionDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=16):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]

        # Tokenize the input text
        inputs = self.tokenizer(text, truncation=True, padding='max_length', max_length=self.max_length, return_tensors="pt")

        # Return input_ids, attention_mask, and label
        item = {key: val.squeeze() for key, val in inputs.items()}
        item["labels"] = torch.tensor(label)
        return item

# Sample dataset with action labels
texts = [
    "start move_right move_right accelerate stop",    # Running
    "start move_left move_left stationary stop",      # Stationary
    "start move_forward decelerate stop",             # Walking
    "start accelerate move_forward move_forward stop",# Running
    "start stationary stationary stationary stop"     # Stationary
]

# Labels (e.g., 0 = walking, 1 = running, 2 = stationary)
labels = [1, 2, 0, 1, 2]

# Load tokenizer
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)

# Create dataset and dataloaders
train_dataset = ActionRecognitionDataset(texts, labels, tokenizer)
train_dataloader = DataLoader(train_dataset, batch_size=2, shuffle=True)




In [15]:
# Load BERT model for sequence classification
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=3)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [16]:
# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="no",
    learning_rate=2e-5,
    per_device_train_batch_size=2,
    num_train_epochs=3,
    weight_decay=0.01,
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset
)



In [17]:
# Start training
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Step,Training Loss


TrainOutput(global_step=9, training_loss=1.116625150044759, metrics={'train_runtime': 565.6367, 'train_samples_per_second': 0.027, 'train_steps_per_second': 0.016, 'total_flos': 123334414560.0, 'train_loss': 1.116625150044759, 'epoch': 3.0})

Jika tidak ingin menggunakan wandb,


```
training_args = TrainingArguments(
    output_dir="./results",
    report_to="none",  # disables W&B logging
    # other training arguments
)
```




In [19]:
def predict(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)

    # Move inputs to the same device as the model
    inputs = inputs.to(model.device)  # Add this line

    outputs = model(**inputs)
    logits = outputs.logits
    predicted_class = torch.argmax(logits, dim=1).item()

    # Map output to actions
    actions = {0: "walking", 1: "running", 2: "stationary"}
    return actions[predicted_class]

# Test the model with a new example
print("Prediction for 'start move_forward move_forward stop':", predict("start move_forward move_forward stop"))

Prediction for 'start move_forward move_forward stop': running
