In [1]:
import pandas as pd
import torch
import collections
from tqdm.notebook import tqdm
from transformers import BertTokenizer
from torch.utils.data import TensorDataset
from transformers import BertForSequenceClassification

df  = pd.read_csv("SampleDataset.csv", encoding='latin-1')

possible_labels = df.TECHNOLOGY.unique()

label_dict = {}
for index, possible_label in enumerate(possible_labels):
    label_dict[possible_label] = index

inverse_dict = collections.defaultdict(list)
for key, value in label_dict.items():
    inverse_dict[value].append(key)

class RLAgent:
    def __init__(self, model, num_actions, learning_rate, gamma):
        self.model = model
        self.num_actions = num_actions
        self.learning_rate = learning_rate
        self.gamma = gamma
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate)

    def take_action(self, sentence):
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
        encoded_sentence = tokenizer.encode_plus(
            sentence,
            add_special_tokens=True,
            max_length=256,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )
        input_ids = encoded_sentence['input_ids'].to(device)
        attention_mask = encoded_sentence['attention_mask'].to(device)
        with torch.no_grad():
            self.model.eval()
            logits = self.model(input_ids, attention_mask=attention_mask)[0]
        probs = torch.nn.functional.softmax(logits, dim=1)
        predicted_label = torch.argmax(probs, dim=1).item()
        return predicted_label

    def update_model(self, sentence, action, reward, next_sentence):
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
        encoded_sentence = tokenizer.encode_plus(
            sentence,
            add_special_tokens=True,
            max_length=256,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )
        input_ids = encoded_sentence['input_ids'].to(device)
        attention_mask = encoded_sentence['attention_mask'].to(device)
        self.model.train()
        logits = self.model(input_ids, attention_mask=attention_mask)[0]
        q_values = logits[0]
        current_q_value = q_values[action]

        encoded_next_sentence = tokenizer.encode_plus(
            next_sentence,
            add_special_tokens=True,
            max_length=256,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )
        input_ids_next = encoded_next_sentence['input_ids'].to(device)
        attention_mask_next = encoded_next_sentence['attention_mask'].to(device)
        with torch.no_grad():
            self.model.eval()
            next_logits = self.model(input_ids_next, attention_mask=attention_mask_next)[0]
        next_q_values = torch.max(next_logits, dim=1)[0]

        td_error = reward + self.gamma * next_q_values - current_q_value
        updated_q_value = current_q_value + self.learning_rate * td_error

        self.optimizer.zero_grad()
        loss = torch.nn.functional.mse_loss(q_values[action], updated_q_value)
        loss.backward()
        self.optimizer.step()

# Load the pre-trained model
model = BertForSequenceClassification.from_pretrained("bert-base-uncased",
                                                      num_labels=len(label_dict),
                                                      output_attentions=False,
                                                      output_hidden_states=False)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
model.load_state_dict(torch.load('updated/finetuned_BERT_epoch_5.model', map_location=torch.device('cpu')))
#model.load_state_dict(torch.load('data volume/updated_model.pt', map_location=torch.device('cpu')))

# Define the RL agent
num_actions = len(label_dict)
learning_rate = 0.001
gamma = 0.9
agent = RLAgent(model, num_actions, learning_rate, gamma)

# Perform classification and reinforcement learning
while True:
    sentence = input("Enter a sentence (or 'q' to quit): ")
    if sentence == 'q':
        break
    
    # Take action based on current model
    predicted_label = agent.take_action(sentence)
    predicted_class = inverse_dict[predicted_label]
    print("Predicted Class:", predicted_class[0])
    
    # Get human feedback
    feedback = input("Enter human feedback (1 for correct, 0 for incorrect): ")
    reward = int(feedback)
    if reward == 1:
        break
    
    # Update the model based on the human feedback
    next_sentence = input("Enter the next sentence (or 'q' to quit): ")
    if next_sentence == 'q':
        break
    agent.update_model(sentence, predicted_label, reward, next_sentence)
    
    
# Save the updated model
torch.save(agent.model.state_dict(), "updated/finetuned_BERT_epoch_5.model")

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Enter a sentence (or 'q' to quit): I am trying to create a dashboard. I used date slicer to filter dates to reflect data on the table. There are data in table till 2022 but the data slicer is moved beyond 4/31/2021 is not showing data in the tables
Predicted Class: PowerBI
Enter human feedback (1 for correct, 0 for incorrect): 1
