In [2]:
# Install the required libraries
!pip install transformers pandas openpyxl sklearn

# Import the necessary libraries
import pandas as pd
from transformers import BertTokenizer, BertForSequenceClassification
import torch
from sklearn.metrics import accuracy_score, precision_score

# Load the data from the Excel file
data = pd.read_excel('/content/2_labeled.xlsx')

# Load the pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)

# Tokenize and encode the input text
inputs = tokenizer(data['Question'].tolist(), return_tensors='pt', padding=True, truncation=True, max_length=512)

# Move the input data to the appropriate device (Colab should automatically detect and use a GPU if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
inputs = {k: v.to(device) for k, v in inputs.items()}
model = model.to(device)

# Perform inference
with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits

# Get the predicted difficulty levels
predicted_levels = torch.argmax(logits, dim=-1)

# Categorize the questions based on the predicted levels
data['Predicted_Difficulty'] = ['easy' if level == 0 else 'medium' if level == 1 else 'hard' for level in predicted_levels]

# Convert the actual and predicted difficulty levels to numeric values (Easy=0, Medium=1, Hard=2)
actual_levels = [0 if level == 'easy' else 1 if level == 'medium' else 2 for level in data['Difficulty']]
predicted_levels = [0 if level == 'easy' else 1 if level == 'medium' else 2 for level in data['Predicted_Difficulty']]

# Calculate accuracy
accuracy = accuracy_score(actual_levels, predicted_levels)
print(f"Accuracy: {accuracy}")

# Calculate precision for each class
precision_easy = precision_score(actual_levels, predicted_levels, pos_label=0, average='macro')
precision_medium = precision_score(actual_levels, predicted_levels, pos_label=1, average='macro')
precision_hard = precision_score(actual_levels, predicted_levels, pos_label=2, average='macro')

print(f"Precision (Easy): {precision_easy}")
print(f"Precision (Medium): {precision_medium}")
print(f"Precision (Hard): {precision_hard}")

Collecting sklearn
  Using cached sklearn-0.0.post12.tar.gz (2.6 kB)
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpython setup.py egg_info[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Preparing metadata (setup.py) ... [?25l[?25herror
[1;31merror[0m: [1mmetadata-generation-failed[0m

[31m×[0m Encountered error while generating package metadata.
[31m╰─>[0m See above for output.

[1;35mnote[0m: This is an issue with the package mentioned above, not pip.
[1;36mhint[0m: See above for details.


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Accuracy: 0.2753623188405797
Precision (Easy): 0.25735294117647056
Precision (Medium): 0.25735294117647056
Precision (Hard): 0.25735294117647056


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [10]:
from google.colab import files

files.download('best_model.bin')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [6]:
# Importing necessary libraries
import numpy as np
import pandas as pd
from sklearn import metrics, preprocessing
import transformers
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel

# Setting up device
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

# Loading the data
df = pd.read_excel("/content/2_labeled.xlsx")
df = df[['Question', 'Difficulty']]

# Encoding labels
label_encoder = preprocessing.LabelEncoder()
df['Difficulty'] = label_encoder.fit_transform(df['Difficulty'])

# Key variables
MAX_LEN = 200
TRAIN_BATCH_SIZE = 8
VALID_BATCH_SIZE = 4
EPOCHS = 10  # Increased number of epochs
LEARNING_RATE = 1e-05
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.questions = dataframe.Question
        self.labels = dataframe.Difficulty
        self.max_len = max_len

    def __len__(self):
        return len(self.questions)

    def __getitem__(self, index):
        question = str(self.questions[index])
        question = " ".join(question.split())

        inputs = self.tokenizer.encode_plus(
            question,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            pad_to_max_length=True,
            return_token_type_ids=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]

        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'labels': torch.tensor(self.labels[index], dtype=torch.long)
        }

# Splitting the dataset
train_size = 0.8
train_dataset = df.sample(frac=train_size, random_state=200)
test_dataset = df.drop(train_dataset.index).reset_index(drop=True)
train_dataset = train_dataset.reset_index(drop=True)

print("FULL Dataset: {}".format(df.shape))
print("TRAIN Dataset: {}".format(train_dataset.shape))
print("TEST Dataset: {}".format(test_dataset.shape))

training_set = CustomDataset(train_dataset, tokenizer, MAX_LEN)
testing_set = CustomDataset(test_dataset, tokenizer, MAX_LEN)

train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

test_params = {'batch_size': VALID_BATCH_SIZE,
               'shuffle': True,
               'num_workers': 0
               }

training_loader = DataLoader(training_set, **train_params)
testing_loader = DataLoader(testing_set, **test_params)

# Model definition
class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        self.l1 = transformers.BertModel.from_pretrained('bert-base-uncased')
        self.l2 = torch.nn.Dropout(0.3)
        self.l3 = torch.nn.Linear(768, 3)

    def forward(self, ids, mask, token_type_ids):
        _, output_1 = self.l1(ids, attention_mask=mask, token_type_ids=token_type_ids, return_dict=False)
        output_2 = self.l2(output_1)
        output = self.l3(output_2)
        return output

model = BERTClass()
model.to(device)

# Loss function and optimizer
def loss_fn(outputs, labels):
    return torch.nn.CrossEntropyLoss()(outputs, labels)

optimizer = torch.optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

# Learning rate scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, verbose=True)

# Training function
def train(epoch):
    model.train()
    for _, data in enumerate(training_loader, 0):
        ids = data['ids'].to(device, dtype=torch.long)
        mask = data['mask'].to(device, dtype=torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype=torch.long)
        labels = data['labels'].to(device, dtype=torch.long)

        outputs = model(ids, mask, token_type_ids)
        loss = loss_fn(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if _ % 500 == 0:
            print(f'Epoch: {epoch}, Loss: {loss.item()}')

# Validation function
def validation():
    model.eval()
    fin_targets = []
    fin_outputs = []
    with torch.no_grad():
        for _, data in enumerate(testing_loader, 0):
            ids = data['ids'].to(device, dtype=torch.long)
            mask = data['mask'].to(device, dtype=torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype=torch.long)
            labels = data['labels'].to(device, dtype=torch.long)

            outputs = model(ids, mask, token_type_ids)
            fin_targets.extend(labels.cpu().detach().numpy().tolist())
            fin_outputs.extend(outputs.cpu().detach().numpy().tolist())
    return fin_outputs, fin_targets

# Training and evaluation loop
best_accuracy = 0
current_epoch = 0  # Track the current epoch

for epoch in range(EPOCHS):
    print(f"Starting epoch {epoch + 1}/{EPOCHS}")
    train(epoch)
    outputs, targets = validation()
    predicted_classes = np.argmax(outputs, axis=1)
    accuracy = metrics.accuracy_score(targets, predicted_classes)
    f1_score_micro = metrics.f1_score(targets, predicted_classes, average='micro')
    f1_score_macro = metrics.f1_score(targets, predicted_classes, average='macro')
    print(f"Accuracy Score = {accuracy}")
    print(f"F1 Score (Micro) = {f1_score_micro}")
    print(f"F1 Score (Macro) = {f1_score_macro}")

    # Adjust learning rate based on validation loss
    scheduler.step(metrics.log_loss(targets, outputs, labels=[0, 1, 2]))

    # Save the model if it has the best accuracy so far
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        torch.save(model.state_dict(), 'best_model.bin')
        print(f"Saved Best Model with Accuracy: {best_accuracy}")

    current_epoch = epoch + 1  # Update current epoch

# Load the best model
model.load_state_dict(torch.load('best_model.bin'))
print(f"Training completed. Best model saved after epoch {current_epoch}")


FULL Dataset: (138, 2)
TRAIN Dataset: (110, 2)
TEST Dataset: (28, 2)


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Starting epoch 1/10
Epoch: 0, Loss: 0.9870579242706299
Accuracy Score = 0.6071428571428571
F1 Score (Micro) = 0.6071428571428571
F1 Score (Macro) = 0.499047619047619
Saved Best Model with Accuracy: 0.6071428571428571
Starting epoch 2/10




Epoch: 1, Loss: 1.1072046756744385
Accuracy Score = 0.5
F1 Score (Micro) = 0.5
F1 Score (Macro) = 0.3974358974358975
Starting epoch 3/10




Epoch: 2, Loss: 0.9499197006225586
Accuracy Score = 0.6428571428571429
F1 Score (Micro) = 0.6428571428571429
F1 Score (Macro) = 0.6325281803542673
Saved Best Model with Accuracy: 0.6428571428571429
Starting epoch 4/10




Epoch: 3, Loss: 0.930146336555481
Accuracy Score = 0.6785714285714286
F1 Score (Micro) = 0.6785714285714286
F1 Score (Macro) = 0.6608695652173914
Saved Best Model with Accuracy: 0.6785714285714286
Starting epoch 5/10




Epoch: 4, Loss: 0.9137356281280518
Accuracy Score = 0.6785714285714286
F1 Score (Micro) = 0.6785714285714286
F1 Score (Macro) = 0.654978354978355
Starting epoch 6/10




Epoch: 5, Loss: 0.6520323753356934
Accuracy Score = 0.75
F1 Score (Micro) = 0.75
F1 Score (Macro) = 0.7452012383900929
Saved Best Model with Accuracy: 0.75
Starting epoch 7/10




Epoch: 6, Loss: 0.6022605299949646
Accuracy Score = 0.75
F1 Score (Micro) = 0.75
F1 Score (Macro) = 0.7485380116959064
Starting epoch 8/10




Epoch: 7, Loss: 0.6034979224205017
Accuracy Score = 0.75
F1 Score (Micro) = 0.75
F1 Score (Macro) = 0.7485380116959064
Starting epoch 9/10




Epoch: 8, Loss: 0.44471749663352966
Accuracy Score = 0.6785714285714286
F1 Score (Micro) = 0.6785714285714286
F1 Score (Macro) = 0.6726522187822498
Starting epoch 10/10




Epoch: 9, Loss: 0.35351794958114624
Accuracy Score = 0.7142857142857143
F1 Score (Micro) = 0.7142857142857143
F1 Score (Macro) = 0.7115009746588695
Training completed. Best model saved after epoch 10


In [11]:
import torch
import transformers
import numpy as np
from transformers import BertTokenizer
from sklearn import preprocessing

# Load the model class definition
class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        self.l1 = transformers.BertModel.from_pretrained('bert-base-uncased')
        self.l2 = torch.nn.Dropout(0.3)
        self.l3 = torch.nn.Linear(768, 3)

    def forward(self, ids, mask, token_type_ids):
        _, output_1 = self.l1(ids, attention_mask=mask, token_type_ids=token_type_ids, return_dict=False)
        output_2 = self.l2(output_1)
        output = self.l3(output_2)
        return output

# Load the model
model = BERTClass()
model.load_state_dict(torch.load('best_model.bin'))
model.to(device)
model.eval()

# Tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Preprocessing function
def preprocess_question(question, tokenizer, max_len=200):
    inputs = tokenizer.encode_plus(
        question,
        None,
        add_special_tokens=True,
        max_length=max_len,
        pad_to_max_length=True,
        return_token_type_ids=True,
        return_attention_mask=True,
        truncation=True
    )
    ids = inputs['input_ids']
    mask = inputs['attention_mask']
    token_type_ids = inputs['token_type_ids']

    return {
        'ids': torch.tensor(ids, dtype=torch.long).unsqueeze(0),  # Batch size of 1
        'mask': torch.tensor(mask, dtype=torch.long).unsqueeze(0),
        'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long).unsqueeze(0)
    }

# Predict function
def predict_difficulty(question, model, tokenizer, label_encoder):
    model.eval()
    inputs = preprocess_question(question, tokenizer)
    ids = inputs['ids'].to(device, dtype=torch.long)
    mask = inputs['mask'].to(device, dtype=torch.long)
    token_type_ids = inputs['token_type_ids'].to(device, dtype=torch.long)

    with torch.no_grad():
        outputs = model(ids, mask, token_type_ids)
        probabilities = torch.softmax(outputs, dim=1).cpu().detach().numpy()
        predicted_class = np.argmax(probabilities, axis=1)[0]

    return label_encoder.inverse_transform([predicted_class])[0]

# Label encoder
label_encoder = preprocessing.LabelEncoder()
label_encoder.classes_ = np.array(['easy', 'medium', 'hard'])

# Example prediction
new_question = "Consider the following schedules involving two transactions. S1 : r1(X) ; r1(Y) ; r2(X) ; r2(Y) ; w2(Y) ; w1(X) S2 : r1(X) ; r2(X) ; r2(Y) ; w2(Y) ; r1(Y) ; w1(X) Which one of the following statements is correct with respect to above? Options: a. Both S1 and S2 are conflict serializable.; b. Both S1 and S2 are not conflict serializable.; c. S1 is conflict serializable and S2 is not conflict serializable.; d. S1 is not conflict serializable and S2 is conflict serializable."
predicted_difficulty = predict_difficulty(new_question, model, tokenizer, label_encoder)
print(f"The predicted difficulty for the question is: {predicted_difficulty}")


The predicted difficulty for the question is: medium


