<a href="https://colab.research.google.com/github/MorkMongkul/Deep-Learning-Project/blob/main/Bert_Fine_tuning_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Load Some Dependencies

In [28]:

from transformers import TFAutoModel,AutoTokenizer,BertModel,AdamW,BertTokenizer
from datasets import load_dataset
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
from sklearn.metrics import classification_report, confusion_matrix

# Load Pre-trained Bert Model

In [7]:
model = TFAutoModel.from_pretrained( 'bert-base-uncased')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

# Initialize Tokenization

In [8]:
tokenizer = AutoTokenizer.from_pretrained( 'bert-base-uncased')

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

# Load Training/Validation/Testing Data

In [11]:
emotions = load_dataset('SetFit/emotion')

README.md:   0%|          | 0.00/194 [00:00<?, ?B/s]

Repo card metadata block was not found. Setting CardData to empty.


train.jsonl:   0%|          | 0.00/2.23M [00:00<?, ?B/s]

validation.jsonl:   0%|          | 0.00/276k [00:00<?, ?B/s]

test.jsonl:   0%|          | 0.00/279k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/16000 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/2000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/2000 [00:00<?, ? examples/s]

In [12]:
emotions

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'label_text'],
        num_rows: 16000
    })
    validation: Dataset({
        features: ['text', 'label', 'label_text'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['text', 'label', 'label_text'],
        num_rows: 2000
    })
})

# Convert [text] data into tokens

In [13]:
def tokenize(batch):
  return tokenizer(batch['text'],padding=True,truncation=True)

In [14]:
emotions_encoded = emotions.map(tokenize,batched= True,batch_size = None)

Map:   0%|          | 0/16000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

In [15]:
emotions_encoded

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'label_text', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 16000
    })
    validation: Dataset({
        features: ['text', 'label', 'label_text', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['text', 'label', 'label_text', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 2000
    })
})

# Convert Tokens into Tensors

In [16]:
def convert_to_tensors_from_dataset(dataset):
    """
    Convert the 'input_ids' and 'attention_mask' from the dataset into PyTorch tensors.

    Args:
        dataset (dict): A dataset object containing 'input_ids' and 'attention_mask'.

    Returns:
        input_ids (torch.Tensor): Tensor containing input IDs.
        attention_mask (torch.Tensor): Tensor containing attention masks.
    """
    # Convert lists or arrays from the dataset to PyTorch tensors
    input_ids = torch.tensor(dataset['input_ids'])
    attention_mask = torch.tensor(dataset['attention_mask'])

    return input_ids, attention_mask

In [17]:
train_dataset = emotions_encoded['train']
valid_dataset = emotions_encoded['validation']
test_dataset = emotions_encoded['test']

# Apply the function to each split
train_input_ids, train_attention_mask = convert_to_tensors_from_dataset(train_dataset)
valid_input_ids, valid_attention_mask = convert_to_tensors_from_dataset(valid_dataset)
test_input_ids, test_attention_mask = convert_to_tensors_from_dataset(test_dataset)


# Create a Custom Dataset

In [18]:
class SentimentDataset(Dataset):
    def __init__(self, input_ids, attention_mask, labels):
        self.input_ids = input_ids
        self.attention_mask = attention_mask
        self.labels = labels

    def __len__(self):
        return len(self.input_ids)

    def __getitem__(self, idx):
        return {
            'input_ids': self.input_ids[idx],
            'attention_mask': self.attention_mask[idx],
            'label': self.labels[idx]
        }


In [19]:
train_labels = train_dataset['label']
valid_labels = valid_dataset['label']
test_labels = test_dataset['label']

# Create the Dataset objects
train_data = SentimentDataset(train_input_ids, train_attention_mask, train_labels)
valid_data = SentimentDataset(valid_input_ids, valid_attention_mask, valid_labels)
test_data = SentimentDataset(test_input_ids, test_attention_mask, test_labels)

# Create the DataLoader objects
train_loader = DataLoader(train_data, batch_size=16, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=16)
test_loader = DataLoader(test_data, batch_size=16)


:

# Defined Model

In [20]:
class BERTClassifier(nn.Module):
    def __init__(self, bert_model_name, num_classes):
        super(BERTClassifier, self).__init__()
        self.bert = BertModel.from_pretrained(bert_model_name)
        self.dropout = nn.Dropout(0.1)
        self.fc = nn.Linear(self.bert.config.hidden_size, num_classes)

    def forward(self, input_ids, attention_mask):
        # Feed input through BERT
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)

        # Extract the pooled output (typically the [CLS] token representation)
        pooled_output = outputs.pooler_output

        # Apply dropout and then the fully connected layer
        x = self.dropout(pooled_output)
        logits = self.fc(x)

        return logits


In [21]:
# Load pre-trained BERT
model_name = 'bert-base-uncased'
# Initialize the model
model = BERTClassifier(bert_model_name=model_name, num_classes=6)
optimizer = Adam(model.parameters(), lr=2e-5)
criterion = nn.CrossEntropyLoss()

In [22]:
# Training loop
epochs = 3
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(epochs):
    model.train()
    for batch in train_loader:
        optimizer.zero_grad()

        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        loss = criterion(outputs, labels)
        loss.backward()

        optimizer.step()

    print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item()}")


Epoch 1/3, Loss: 0.4540747404098511
Epoch 2/3, Loss: 0.03532255440950394
Epoch 3/3, Loss: 0.006368732079863548


# Model Evaluation

## Validation Data

In [25]:

model.eval()

# Lists to store predictions and true labels
all_preds = []
all_labels = []

# Disable gradient computation for evaluation
with torch.no_grad():
    for batch in valid_loader:
        # Send inputs and labels to the device (GPU/CPU)
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        # Forward pass through the model
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)

        # Get predictions by taking the argmax of the output logits
        _, preds = torch.max(outputs, dim=1)

        # Store predictions and true labels
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Print the classification report
print("Classification Report:")
print(classification_report(all_labels, all_preds))

# Print the confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(all_labels, all_preds))


Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.98      0.96       550
           1       0.98      0.92      0.95       704
           2       0.83      0.93      0.88       178
           3       0.93      0.95      0.94       275
           4       0.94      0.84      0.89       212
           5       0.81      0.94      0.87        81

    accuracy                           0.94      2000
   macro avg       0.91      0.93      0.91      2000
weighted avg       0.94      0.94      0.94      2000

Confusion Matrix:
[[541   0   0   6   3   0]
 [  5 650  35   2   3   9]
 [  2   8 166   2   0   0]
 [ 12   2   0 260   1   0]
 [ 14   1   0   9 179   9]
 [  1   0   0   0   4  76]]


## Testing Data

In [26]:
from sklearn.metrics import classification_report, confusion_matrix

# Set the model to evaluation mode
model.eval()

# Initialize lists to store predictions and true labels
all_preds = []
all_labels = []

# Disable gradient calculation for inference
with torch.no_grad():
    for batch in test_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        # Get model predictions
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)

        # Get the predicted class by taking the argmax of the logits
        _, preds = torch.max(outputs, dim=1)

        # Store predictions and true labels
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Classification report
print("Classification Report on Test Data:")
print(classification_report(all_labels, all_preds))

# Confusion matrix
print("Confusion Matrix on Test Data:")
print(confusion_matrix(all_labels, all_preds))


Classification Report on Test Data:
              precision    recall  f1-score   support

           0       0.95      0.98      0.97       581
           1       0.98      0.92      0.95       695
           2       0.77      0.95      0.85       159
           3       0.92      0.92      0.92       275
           4       0.94      0.85      0.89       224
           5       0.69      0.83      0.75        66

    accuracy                           0.93      2000
   macro avg       0.87      0.91      0.89      2000
weighted avg       0.93      0.93      0.93      2000

Confusion Matrix on Test Data:
[[567   2   1  10   1   0]
 [  2 639  45   2   0   7]
 [  1   7 151   0   0   0]
 [ 14   3   0 253   5   0]
 [  7   0   0   9 190  18]
 [  3   0   0   1   7  55]]


# Save the model

In [27]:
# Save the entire model (including architecture + weights)
torch.save(model, 'sentiment_model_full.pth')


In [44]:
class SentimentModel:
    def __init__(self, model_path, tokenizer_name):
        # Load the pre-trained model and tokenizer
        self.model = torch.load(model_path)  # Load the saved model state_dict
        self.model.eval()  # Set the model to evaluation mode
        self.tokenizer = BertTokenizer.from_pretrained(tokenizer_name)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)

        # Label mapping (numeric to sentiment)
        self.label_map = {
            0: 'sadness',
            1: 'job',
            2: 'love',
            3: 'anger',
            4: 'fear',
            5: 'surprise'
        }

    def preprocess_and_predict(self, text):
        # Preprocess the text and convert to tensor
        inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
        input_ids = inputs["input_ids"].to(self.device)
        attention_mask = inputs["attention_mask"].to(self.device)

        # Get the model's prediction
        with torch.no_grad():
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
            predictions = torch.argmax(outputs, dim=1)

        # Map the predicted numeric label to the corresponding text label
        predicted_label = self.label_map[predictions.item()]

        # Return the predicted sentiment label
        return predicted_label

# Initialize the model and tokenizer
sentiment_model = SentimentModel('sentiment_model_full.pth', 'bert-base-uncased')


  self.model = torch.load(model_path)  # Load the saved model state_dict


In [45]:
# Save tokenizer for later
tokenizer.save_pretrained('sentiment_model_tokenizer')

('sentiment_model_tokenizer/tokenizer_config.json',
 'sentiment_model_tokenizer/special_tokens_map.json',
 'sentiment_model_tokenizer/vocab.txt',
 'sentiment_model_tokenizer/added_tokens.json',
 'sentiment_model_tokenizer/tokenizer.json')

In [46]:
# Initialize the model and tokenizer
sentiment_model = SentimentModel('sentiment_model_full.pth', 'sentiment_model_tokenizer')


  self.model = torch.load(model_path)  # Load the saved model state_dict


In [48]:
# Example usage
input_text = "i am feeling grouchy"
predicted_class = sentiment_model.preprocess_and_predict(input_text)

print(f"Predicted sentiment class: {predicted_class}")

Predicted sentiment class: anger


In [49]:
# Set up your Git username and email
!git config --global user.name "MorkMongkul"
!git config --global user.email "Monkholmama123@gmail.com"