In [4]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from transformers import get_linear_schedule_with_warmup
from sklearn.model_selection import val_test_split
from sklearn.metrics import accuracy_score
from torch.utils.data import DataLoader, TensorDataset, RandomSampler, SequentialSampler

  from .autonotebook import tqdm as notebook_tqdm
2024-11-16 11:28:55.529429: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-16 11:28:56.159750: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
texts = ['I love this product!', 'This is terrible']
labels = [1, 0]

In [None]:
input_ids = []
attention_masks = []
for text in texts:
    encoded_dict = tokenizer.encode_plus(
        text,
        add_special_tokens=True,
        max_length=64,
        padding='max_length',
        truncation=True,
        return_attention_mask=True,
        return_tensors='pt'
    )
    input_ids.append(
        encoded_dict['input_ids'],
    )
    attention_masks.append(
        encoded_dict['attention_mask']
    )
input_ids = torch.cat(input_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)
labels = torch.tensor(labels)

tensor([[ 101, 1045, 2293, 2023, 4031,  999,  102,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0],
        [ 101, 2023, 2003, 6659,  102,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0]]) tensor([[1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

  labels = torch.tensor(labels)


In [14]:
train_inputs, val_inputs, val_labels, val_labels, trainsmasks, val_masks = train_test_split(
    input_ids, labels, attention_masks, random_state=42, test_size=0.2
)

In [15]:
batch_size = 32

train_data = TensorDataset(train_inputs, trains_masks, train_labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

val_data = TensorDataset(val_inputs, val_masks, val_labels)
val_sampler = RandomSampler(val_data)
val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=batch_size)

In [16]:
optimizer = AdamW(model.parameters(), lr=2e-5, eps=1e-8)
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=0,
    num_training_steps=len(train_dataloader) * 2
)



In [19]:
epochs = 2
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

for epoch in range(epochs):
    model.train()
    total_loss = 0
    for batch in train_dataloader:
        batch = tuple(t.to(device) for t in batch)
        input_ids, input_masks, labels = batch
        optimizer.zero_grad()
        
        outputs = model(input_ids, attention_mask=input_masks, labels=labels)
        loss = outputs.loss
        
        total_loss += loss.item()
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(
            model.parameters(), 1.0
        )
        
        optimizer.step()
        scheduler.step()
        
    avg_train_loss = total_loss / len(train_dataloader)
    
    print(f'Epoch {epoch + 1}/{epochs}')
    print(f'Average training loss: {avg_train_loss}')

Epoch 1/2
Average training loss: 0.7539728879928589
Epoch 2/2
Average training loss: 0.6290143132209778


In [None]:
# validation
model.eval()

val_accuracy = 0
val_preds = []
for batch in val_dataloader:
    batch = tuple(t.to(device) for t in batch)
    input_ids, input_mask, labels = batch
    
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=input_mask)
        
    logits = outputs.logits
    logits = logits.detach().cpu().numpy()
    print(logits)
    label_ids = labels.to('cpu').numpy()
    val_preds.extend(logits.argmax(axis=1))
    val_accuracy = accuracy_score(
        val_labels, val_preds
    )
    print(f'Acc: {val_accuracy * 100:.2f}%')
    model.save_pretrained('fine-tuned-bert')
    

SyntaxError: cannot assign to literal (2824328451.py, line 4)