In [None]:
# Install necessary libraries
!pip install transformers
!pip install torch

import pandas as pd
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import DataLoader, TensorDataset
import torch

# Load a pre-trained BERT model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)

# Sample data
data = {
    'text': [
        'This is a NoC document.',
        'This is a birth certificate.',
        'This is a death certificate.'
    ],
    'label': [0, 1, 2]
}

df = pd.DataFrame(data)

# Tokenize the data
inputs = tokenizer(df['text'].tolist(), return_tensors="pt", padding=True, truncation=True, max_length=128)
labels = torch.tensor(df['label'].tolist())

dataset = TensorDataset(inputs['input_ids'], inputs['attention_mask'], labels)
dataloader = DataLoader(dataset, batch_size=2)

# Define training function
def train(model, dataloader, epochs=3):
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
    for epoch in range(epochs):
        model.train()
        for batch in dataloader:
            input_ids, attention_mask, labels = batch
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        print(f'Epoch {epoch+1}/{epochs} - Loss: {loss.item()}')

# Train the model
train(model, dataloader)

# Save the model
model.save_pretrained('/content/bert-document-classifier')
tokenizer.save_pretrained('/content/bert-document-classifier')
