In [None]:
!pip install transformers
!pip install plotly
!pip install cufflinks

In [None]:
import gym
import numpy as np
from gym import spaces
from transformers import BertTokenizerFast
import pandas as pd
import torch
from transformers import BertTokenizerFast, DistilBertForSequenceClassification
from torch.distributions import Categorical
from torch.optim import Adam
from tqdm import tqdm
import matplotlib.pyplot as plt

In [None]:
class LabelingEnv(gym.Env):
  def __init__(self, instances, labels):
    super(LabelingEnv, self).__init__()
    self.instances = instances
    self.labels = labels
    self.current_instance = 0
    self.tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
    encoded = self.tokenizer([self.instances[self.current_instance]], return_tensors='pt', padding='max_length', truncation=True, max_length=128, return_token_type_ids=False)

    #define the output of the model
    self.action_space = spaces.Discrete(2)
    self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(1, 128))

  def step(self, action):
    reward = 1 if action == self.labels[self.current_instance] else -1
    self.current_instance += 1
    done = self.current_instance == len(self.instances)
    if done:
      next_state = None
    else:
        encoded = self.tokenizer([self.instances[self.current_instance]], return_tensors='pt', padding='max_length', truncation=True, max_length=128, return_token_type_ids=False)
        next_state = { 'input_ids': encoded['input_ids'], 'attention_mask': encoded['attention_mask'] }
    return next_state, reward, done


  def reset(self):
    self.current_instance = 0
    encoded = self.tokenizer([self.instances[self.current_instance]], return_tensors='pt', padding='max_length', truncation=True, max_length=128, return_token_type_ids=False)
    return { 'input_ids': encoded['input_ids'], 'attention_mask': encoded['attention_mask'] }

In [None]:
from google.colab import drive
drive.mount('/content/drive')

  self._read_thread.setDaemon(True)


Mounted at /content/drive


In [None]:
#loading model from BERT
model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased') # 2 labels: Slang, No Slang
for param in model.base_model.parameters():
    param.requires_grad = False
tokenizer = BertTokenizerFast.from_pretrained('distilbert-base-uncased')

#set up an optimizer
optimizer = Adam(model.parameters(), lr=1e-5)

df = pd.read_csv('/content/drive/MyDrive/BERT Models/Dataset/unbiasedDataTrain.csv') #the file directory
df.drop_duplicates(subset = ['sentence'], inplace = True)

instances = df['sentence'].tolist()
labels = df['label'].tolist()

#custom envinronment
env = LabelingEnv(instances, labels)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_layer_norm.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'pre_classifier.weight', 'classifier.bias', 'classifier.we

In [None]:
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

kf = KFold(n_splits=5, shuffle=True, random_state=42)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

all_rewards = []

for fold, (train_index, test_index) in enumerate(kf.split(instances)):
    print(f'Starting Fold {fold+1}...')
    train_instances = [instances[i] for i in train_index]
    train_labels = [labels[i] for i in train_index]
    test_instances = [instances[i] for i in test_index]
    test_labels = [labels[i] for i in test_index]

    # Initialize model and optimizer for each fold
    model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased') # 2 labels: Slang, No Slang
    for param in model.base_model.parameters():
        param.requires_grad = False
    model.to(device)
    optimizer = Adam(model.parameters(), lr=1e-5)

    # Create environment with training data
    env = LabelingEnv(train_instances, train_labels)

    n = 100 #number of epochs
    model.train()
    fold_rewards = []
    for epoch in tqdm(range(n), desc = 'Epochs'):
        state = env.reset()
        done = False
        epoch_rewards = []
        pbar = tqdm(total=len(env.instances), desc=f'Epoch {epoch + 1}', leave=False)
        while not done:
            if state is not None:
                state = {k: v.to(device) for k, v in state.items()}
                outputs = model(**state)

                #softmax for model output
                probs = torch.nn.functional.softmax(outputs.logits, dim=-1)

                #sampling action from the probabilities
                dist = Categorical(probs[0])
                action = dist.sample()

                #train in the environment
                new_state, reward, done = env.step(action.item())
                epoch_rewards.append(reward)

                loss = -dist.log_prob(action) * reward

                #backpropagation
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()

                #updating the state
                state = new_state if new_state is not None else None

                pbar.update(1)
            else:
                break
        pbar.close()
        fold_rewards.append(np.sum(epoch_rewards))
        print(f'\nEpoch {epoch + 1}: Total rewards {np.sum(epoch_rewards)}')

    print(f'Validating on Fold {fold+1}...')
    env = LabelingEnv(test_instances, test_labels)
    model.eval()
    preds = []
    with torch.no_grad():
        for instance in test_instances:
            encoded = tokenizer([instance], return_tensors='pt', padding='max_length', truncation=True, max_length=128, return_token_type_ids=False)
            encoded = {k: v.to(device) for k, v in encoded.items()}
            outputs = model(**encoded)
            _, predicted = torch.max(outputs.logits, dim=1)
            preds.append(predicted.item())
    all_rewards.append(fold_rewards)
    accuracy = accuracy_score(test_labels, preds)
    precision = precision_score(test_labels, preds)
    recall = recall_score(test_labels, preds)
    f1 = f1_score(test_labels, preds)

    print(f'Validation results for Fold {fold+1}: Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1-score: {f1}\n')


In [None]:
import plotly.graph_objects as go
import cufflinks as cf
import pandas as pd

#assuming all_rewards is a list of lists where each sublist is rewards of one fold
all_rewards_df = pd.DataFrame(all_rewards).T #transpose to have each fold as a column

moving_avg_rewards = all_rewards_df.rolling(window=10).mean()

fig = go.Figure()
for fold in range(5):
    fig.add_trace(go.Scatter(x=list(range(len(moving_avg_rewards))),
                             y=moving_avg_rewards[fold],
                             mode='lines',
                             name=f'Fold {fold+1}'))

fig.update_layout(title='Moving Average Rewards per Epoch for each fold',
                   xaxis_title='Epoch',
                   yaxis_title='Moving Average Rewards')

fig.show()

In [None]:
model.save_pretrained('/content/drive/MyDrive/BERT Models/BERT RL/model')
tokenizer.save_pretrained('/content/drive/MyDrive/BERT Models/BERT RL/tokenizer')
import pickle

with open("/content/drive/MyDrive/BERT Models/BERT RL/instances", "wb") as f:
    pickle.dump(instances, f)

with open("/content/drive/MyDrive/BERT Models/BERT RL/labels", "wb") as f:
    pickle.dump(labels, f)


In [None]:
from transformers import BertTokenizerFast, DistilBertForSequenceClassification
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
import pandas as pd
import torch

model_dir = '/content/drive/MyDrive/BERT Models/BERT RL/model'
tokenizer_dir = '/content/drive/MyDrive/BERT Models/BERT RL/tokenizer'

model = DistilBertForSequenceClassification.from_pretrained(model_dir)
tokenizer = BertTokenizerFast.from_pretrained(tokenizer_dir)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

test_df = pd.read_csv('/content/drive/MyDrive/BERT Models/Dataset/unbiasedDataTest.csv')
test_instances = test_df['sentence'].tolist()
test_labels = test_df['label'].tolist()

env = LabelingEnv(test_instances, test_labels)
model.eval()
preds = []
with torch.no_grad():
    for instance in test_instances:
        encoded = tokenizer([instance], return_tensors='pt', padding='max_length', truncation=True, max_length=128)
        encoded = {k: v.to(device) for k, v in encoded.items()}
        outputs = model(**encoded)
        _, predicted = torch.max(outputs.logits, dim=1)
        preds.append(predicted.item())

accuracy = accuracy_score(test_labels, preds)
precision = precision_score(test_labels, preds)
recall = recall_score(test_labels, preds)
f1 = f1_score(test_labels, preds)

print(f'Test results: Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1-score: {f1}\n')