<a href="https://colab.research.google.com/github/OneFineStarstuff/State-of-the-Art/blob/main/Transformers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torch torchvision torchaudio --upgrade
!pip install torchtext --upgrade

In [None]:
pip show torch torchtext

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchtext.legacy.datasets import IMDB
from torchtext.legacy.data import Field, BucketIterator
from transformers import BertTokenizer, BertForSequenceClassification

# Load BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Prepare data
def tokenize_and_cut(sentence):
    tokens = tokenizer.tokenize(sentence)
    tokens = tokens[:tokenizer.model_max_length-2]
    return tokens

TEXT = Field(batch_first=True, use_vocab=False, tokenize=tokenize_and_cut, preprocessing=tokenizer.convert_tokens_to_ids, init_token=tokenizer.cls_token_id, eos_token=tokenizer.sep_token_id, pad_token=tokenizer.pad_token_id, unk_token=tokenizer.unk_token_id)
LABEL = Field(sequential=False, use_vocab=False, dtype=torch.long)
train_data, test_data = IMDB.splits(TEXT, LABEL)
train_iterator, test_iterator = BucketIterator.splits((train_data, test_data), batch_size=8, device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))

# Training loop
optimizer = optim.Adam(model.parameters(), lr=2e-5)
criterion = nn.CrossEntropyLoss()

model.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))

model.train()
for epoch in range(3):
    for batch in train_iterator:
        optimizer.zero_grad()
        input_ids = batch.text
        labels = batch.label
        outputs = model(input_ids, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        print(f"Epoch [{epoch+1}], Loss: {loss.item()}")

# Evaluation
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for batch in test_iterator:
        input_ids = batch.text
        labels = batch.label
        outputs = model(input_ids)
        _, predicted = torch.max(outputs.logits, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy: {accuracy:.2f}%')