Dataset is accessible through: https://drive.google.com/drive/folders/13USe0gzuzmgJxKuQQqGZck3stj5WkY00?usp=sharing

In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [2]:
import sys
import os
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from bpe import BayesPE  # BayesPE class
from llm_model import LLM
import evaluation  # Evaluation functions
import constants

In [3]:
# Define instructions
instructions = [
    'classify the sentiment of the Amazon review below into one of the following classes:',
    'Categorize the sentiment of the Amazon review provided into one of the following classes:',
    'Categorize the sentiment of the Amazon review provided into one of the given classes:',
    'Determine the sentiment category of the given Amazon review by classifying it into one of the following classes:',
    'Classify the sentiment of the given Amazon review into one of the following categories:',
    'Assign the sentiment of the Amazon review provided to one of the given categories:',
    'Categorize the sentiment of the provided Amazon review into one of the following classes:',
    'Determine the sentiment category that best corresponds to the Amazon review provided amongst the following options:',
    'Classify the sentiment expressed in the Amazon review below into one of the following categories:'
]

In [4]:
import pandas as pd
import numpy as np

# Load modified datasets
df_train = pd.read_csv('train_modified.csv', header=None)
df_test = pd.read_csv('test_modified.csv', header=None)

n_train = 50000  
n_in_context = 5  

n_total_in_context = len(instructions) * n_in_context  
n_test = 5000
n_val=100
# **Split Data**
df_train_actual = df_train.iloc[:n_train] 
df_in_context_base = df_train.iloc[n_train:n_train + n_total_in_context]
df_val = df_train.iloc[n_train + n_total_in_context:n_train+n_total_in_context+n_val]
df_test_actual = df_test.iloc[:n_test]  

# **Extract Training Data**
gt_labels_train = df_train_actual.iloc[:, 0].values.astype(int) 
samples_train = df_train_actual.iloc[:, 2].values 
gt_labels_val = df_val.iloc[:, 0].values.astype(int) 
samples_val = df_val.iloc[:, 2].values 
# **Extract Test Data (Now from `df_test`)**
gt_labels_test = df_test_actual.iloc[:, 0].values.astype(int)
samples_test = df_test_actual.iloc[:, 2].values 

In [5]:
# Prompt Formatting Class
class PromptFormatting(object):
    def __init__(self):
        self.INSTRUCTION = 'classify the sentiment of the Amazon review below into one of the following classes:'
        self.CLASSES = ['negative', 'positive']
        self.CLASSES_FOR_MATCHING = [self.CLASSES, ['neg', 'pos'], ['1', '2']]
        self.CLASSES_TEXT = '''1. {}\n2. {}'''.format(self.CLASSES[0], self.CLASSES[1])

    def format_instruction(self, instruction):
        return '''{}\n{}\n'''.format(instruction, self.CLASSES_TEXT)

    def format_content(self, content):
        return '''review: {}\nthe review is '''.format(content)

prompt_formatting = PromptFormatting()

# **Prepare Unique In-Context Examples Per Instruction**
for i in range(len(instructions)):  
    start_idx = i * n_in_context
    end_idx = (i + 1) * n_in_context
    df_in_context = df_in_context_base.iloc[start_idx:end_idx]

    samples_in_context_i = df_in_context.iloc[:, 2].values
    gt_labels_in_context_i = df_in_context.iloc[:, 0].values.astype(int)

    if i == 0:
        samples_in_context = np.expand_dims(samples_in_context_i, axis=1)
        gt_labels_in_context = np.expand_dims(gt_labels_in_context_i, axis=1)
    else:
        samples_in_context = np.concatenate((samples_in_context, np.expand_dims(samples_in_context_i, axis=1)), axis=1)
        gt_labels_in_context = np.concatenate((gt_labels_in_context, np.expand_dims(gt_labels_in_context_i, axis=1)), axis=1)


# Initialize BayesPE (Teacher Model)
bayespe_classifier = BayesPE(
    model_name="mistralai/Mistral-7B-Instruct-v0.3", 
    prompt_formatting=prompt_formatting,
    instructions=instructions, 
    few_shot_texts_sets=samples_in_context, 
    few_shot_labels_sets=gt_labels_in_context, 
    use_reduced_precision=True
)

# Print example prompt
bayespe_classifier.print_prompt_example()

# Optimize prompt weights
bayespe_classifier.optimise_weights(samples_val, gt_labels_val)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

EXAMPLE 1:
classify the sentiment of the Amazon review below into one of the following classes:
1. negative
2. positive

review: The build quality on this caliper is quite good (especially at the price). Mine has no discernible play in the mechanism, came with an extra battery and a reasonably beefy plastic case, and zeros out steadily without any display jumpiness. The unit I received is branded "Maxwell".Note that this caliper does *not* have fraction support in the display, and is therefore somewhat annoying to use compared to units that are only slightly more expensive.If you're completely strapped or buying these in bulk for basic uses, you won't be unhappy with your purchase. If you're a hobbyist looking for a single inexpensive but high-functionality unit, do yourself a favor and spend the extra few dollars to get one with fraction support.
the review is positive

EXAMPLE 2:
classify the sentiment of the Amazon review below into one of the following classes:
1. negative
2. posit

100%|█████████████████████████████████████████████████████████████████████████████████| 100/100 [00:10<00:00,  9.46it/s]


inference for promt 2 out of 9


100%|█████████████████████████████████████████████████████████████████████████████████| 100/100 [00:08<00:00, 12.24it/s]


inference for promt 3 out of 9


100%|█████████████████████████████████████████████████████████████████████████████████| 100/100 [00:06<00:00, 14.41it/s]


inference for promt 4 out of 9


100%|█████████████████████████████████████████████████████████████████████████████████| 100/100 [00:09<00:00, 10.95it/s]


inference for promt 5 out of 9


100%|█████████████████████████████████████████████████████████████████████████████████| 100/100 [00:06<00:00, 14.86it/s]


inference for promt 6 out of 9


100%|█████████████████████████████████████████████████████████████████████████████████| 100/100 [00:05<00:00, 16.91it/s]


inference for promt 7 out of 9


100%|█████████████████████████████████████████████████████████████████████████████████| 100/100 [00:07<00:00, 13.00it/s]


inference for promt 8 out of 9


100%|█████████████████████████████████████████████████████████████████████████████████| 100/100 [00:06<00:00, 15.53it/s]


inference for promt 9 out of 9


100%|█████████████████████████████████████████████████████████████████████████████████| 100/100 [00:07<00:00, 13.28it/s]


iteration 0, loss: 18.864399676384192


array([0.1450173 , 0.1302915 , 0.12363394, 0.0995637 , 0.1046686 ,
       0.07027145, 0.13994132, 0.07101653, 0.11559573], dtype=float32)

In [None]:
_,probs,prompt_weights = bayespe_classifier.forward(samples_train, n_forward_passes=9)
# Convert ensembled logits to Dirichlet parameters

inference for promt 1 out of 9


 13%|██████████▎                                                                   | 6640/50000 [06:34<42:51, 16.86it/s]

In [None]:
# Get teacher logits and probabilities for KD
teacher_probs,_,_ = bayespe_classifier.forward(samples_test, n_forward_passes=9)

print(teacher_probs[:10, :])
f1_score = evaluation.compute_metric(gt_labels_test, teacher_probs, metric='f1')
ece = evaluation.compute_metric(gt_labels_test, teacher_probs, metric='ece')
print('Teacher f1-score: {}, Teacher ECE: {}'.format(f1_score, ece))