In [14]:
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
import torch
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from torch.utils.data import DataLoader, Dataset

# Define the dataset
class VoiceCommandsDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, index):
        return {'input_ids': self.encodings['input_ids'][index],
                'attention_mask': self.encodings['attention_mask'][index],
                'labels': torch.tensor(self.labels[index], dtype=torch.float)}

# Load the training data
train_data = [{'text': 'start google', 'label': 0},
              {'text': 'open google', 'label': 0},
              {'text': 'launch google', 'label': 0}]

# Define the tokenizer and encode the training data
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
train_texts = [data['text'] for data in train_data]
train_labels = [data['label'] for data in train_data]
train_encodings = tokenizer(train_texts, truncation=True, padding=True)

# Define the dataset and dataloader
train_dataset = VoiceCommandsDataset(train_encodings, train_labels)
train_loader = DataLoader(train_dataset, batch_size=16)

# Load the pre-trained BERT model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=1)

# Define the training arguments
training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs=3,              # total number of training epochs
    per_device_train_batch_size=16,  # batch size per device during training
    per_device_eval_batch_size=64,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
)

# Define the trainer and train the model
trainer = Trainer(
    model=model,                         # the instantiated Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=None                    # evaluation dataset
)

trainer.train()


OSError: Unable to load weights from pytorch checkpoint file for 'C:\Users\Admin/.cache\huggingface\hub\models--bert-base-uncased\snapshots\0a6aa9128b6194f4f3c4db429b6cb4891cdb421b\pytorch_model.bin' at 'C:\Users\Admin/.cache\huggingface\hub\models--bert-base-uncased\snapshots\0a6aa9128b6194f4f3c4db429b6cb4891cdb421b\pytorch_model.bin'. If you tried to load a PyTorch model from a TF 2.0 checkpoint, please set from_tf=True.

In [13]:
# Define the new voice commands to predict
new_data = ['start google', 'open google', 'launch google']

# Tokenize the new data
new_encodings = tokenizer(new_data, truncation=True, padding=True)

# Define the dataset and dataloader for the new data
new_dataset = VoiceCommandsDataset(new_encodings, [0]*len(new_data))
new_loader = DataLoader(new_dataset, batch_size=16)

# Make predictions using the trained model
predictions = []
for batch in new_loader:
    input_ids = torch.tensor([id for sublist in batch['input_ids'] for id in sublist])
    attention_mask = torch.tensor([mask for sublist in batch['attention_mask'] for mask in sublist])
    with torch.no_grad():
        outputs = model(input_ids.reshape(len(batch['input_ids']), -1), attention_mask.reshape(len(batch['input_ids']), -1))
        logits = outputs.logits
        predictions.extend(torch.sigmoid(logits).tolist())

# Convert the predictions to labels
labels = []
for pred_probs in predictions:
    if len(pred_probs) < 2:
        label = 0  # Default label value
    else:
        label = 1 if pred_probs[1] > 0.5 else 0  # Positive class probability is at index 1
    labels.append(label)

# Print the predictions
for i in range(len(new_data)):
    print(f"{new_data[i]} -> {'open' if labels[i] else 'do not open'}")





start google -> do not open
open google -> do not open
launch google -> do not open
