In [1]:
import sys
import os
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from bpe import BayesPE
from llm_model import LLM
import evaluation
import constants

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Define task instructions
instructions = [
    'classify the sentiment of the Amazon review below into one of the following classes:',
    'Categorize the sentiment of the Amazon review provided into one of the following classes:',
    'Categorize the sentiment of the Amazon review provided into one of the given classes:',
    'Determine the sentiment category of the given Amazon review by classifying it into one of the following classes:',
    'Classify the sentiment of the given Amazon review into one of the following categories:',
    'Assign the sentiment of the Amazon review provided to one of the given categories:',
    'Categorize the sentiment of the provided Amazon review into one of the following classes:',
    'Determine the sentiment category that best corresponds to the Amazon review provided amongst the following options:',
    'Classify the sentiment expressed in the Amazon review below into one of the following categories:'
]

In [3]:
# Load amazon reviews polarity train and test data
df_train = pd.read_csv('train_modified.csv', header=None)
df_test = pd.read_csv('test_modified.csv', header=None)
n_train = 50000  
n_in_context = 5  
n_total_in_context = len(instructions) * n_in_context  
n_test = 5000
n_val=100
df_train_actual = df_train.iloc[:n_train] 
df_in_context_base = df_train.iloc[n_train:n_train + n_total_in_context]
df_val = df_train.iloc[n_train + n_total_in_context:n_train+n_total_in_context+n_val]
df_test_actual = df_test.iloc[:n_test]  
gt_labels_train = df_train_actual.iloc[:, 0].values.astype(int) 
samples_train = df_train_actual.iloc[:, 2].values 
gt_labels_val = df_val.iloc[:, 0].values.astype(int) 
samples_val = df_val.iloc[:, 2].values 
gt_labels_test = df_test_actual.iloc[:, 0].values.astype(int)
samples_test = df_test_actual.iloc[:, 2].values 

In [4]:
# Define a prompt formatting class for sentiment classification and initializes an LLM-based classifier
class PromptFormatting(object):
    def __init__(self):
        self.INSTRUCTION = 'classify the sentiment of the Amazon review below into one of the following classes:'
        self.CLASSES = ['negative', 'positive']
        self.CLASSES_FOR_MATCHING = [self.CLASSES, ['neg', 'pos'], ['1', '2']]
        self.CLASSES_TEXT = '''1. {}\n2. {}'''.format(self.CLASSES[0], self.CLASSES[1])

    def format_instruction(self, instruction):
        return '''{}\n{}\n'''.format(instruction, self.CLASSES_TEXT)

    def format_content(self, content):
        return '''review: {}\nthe review is '''.format(content)

prompt_formatting = PromptFormatting()

# **Prepare Unique In-Context Examples Per Instruction**
for i in range(len(instructions)):  
    start_idx = i * n_in_context
    end_idx = (i + 1) * n_in_context
    df_in_context = df_in_context_base.iloc[start_idx:end_idx]

    samples_in_context_i = df_in_context.iloc[:, 2].values
    gt_labels_in_context_i = df_in_context.iloc[:, 0].values.astype(int)

    if i == 0:
        samples_in_context = np.expand_dims(samples_in_context_i, axis=1)
        gt_labels_in_context = np.expand_dims(gt_labels_in_context_i, axis=1)
    else:
        samples_in_context = np.concatenate((samples_in_context, np.expand_dims(samples_in_context_i, axis=1)), axis=1)
        gt_labels_in_context = np.concatenate((gt_labels_in_context, np.expand_dims(gt_labels_in_context_i, axis=1)), axis=1)


# Initialize BayesPE (Teacher Model)
bayespe_classifier = BayesPE(
    model_name="mistralai/Mistral-7B-Instruct-v0.3", 
    prompt_formatting=prompt_formatting,
    instructions=instructions, 
    few_shot_texts_sets=samples_in_context, 
    few_shot_labels_sets=gt_labels_in_context, 
    use_reduced_precision=True
)

# Print example prompt
bayespe_classifier.print_prompt_example()

# Optimize prompt weights
weights = bayespe_classifier.optimise_weights(samples_val, gt_labels_val)

Loading checkpoint shards: 100%|██████████████████| 3/3 [00:02<00:00,  1.11it/s]


EXAMPLE 1:
classify the sentiment of the Amazon review below into one of the following classes:
1. negative
2. positive

review: The build quality on this caliper is quite good (especially at the price). Mine has no discernible play in the mechanism, came with an extra battery and a reasonably beefy plastic case, and zeros out steadily without any display jumpiness. The unit I received is branded "Maxwell".Note that this caliper does *not* have fraction support in the display, and is therefore somewhat annoying to use compared to units that are only slightly more expensive.If you're completely strapped or buying these in bulk for basic uses, you won't be unhappy with your purchase. If you're a hobbyist looking for a single inexpensive but high-functionality unit, do yourself a favor and spend the extra few dollars to get one with fraction support.
the review is positive

EXAMPLE 2:
classify the sentiment of the Amazon review below into one of the following classes:
1. negative
2. posit

100%|█████████████████████████████████████████| 100/100 [00:14<00:00,  6.77it/s]


inference for promt 2 out of 9


100%|█████████████████████████████████████████| 100/100 [00:15<00:00,  6.58it/s]


inference for promt 3 out of 9


100%|█████████████████████████████████████████| 100/100 [00:12<00:00,  7.73it/s]


inference for promt 4 out of 9


100%|█████████████████████████████████████████| 100/100 [00:17<00:00,  5.66it/s]


inference for promt 5 out of 9


100%|█████████████████████████████████████████| 100/100 [00:12<00:00,  7.93it/s]


inference for promt 6 out of 9


100%|█████████████████████████████████████████| 100/100 [00:11<00:00,  8.83it/s]


inference for promt 7 out of 9


100%|█████████████████████████████████████████| 100/100 [00:14<00:00,  6.93it/s]


inference for promt 8 out of 9


100%|█████████████████████████████████████████| 100/100 [00:12<00:00,  8.15it/s]


inference for promt 9 out of 9


100%|█████████████████████████████████████████| 100/100 [00:14<00:00,  7.03it/s]


iteration 0, loss: 18.82447903304606


In [None]:
# Get prompt weights and prompt wise probabilities on train data
_,probs,weights = bayespe_classifier.forward(samples_train, n_forward_passes=9)

In [None]:
torch.save(probs,'amazon_probs.pt')
torch.save(weights,'amazon_prompt_weights.pt')

In [None]:
# Evaluate BayesPE performance on amazon reviews polarity test data
teacher_probs,_,_ = bayespe_classifier.forward(samples_test, n_forward_passes=9)
print(teacher_probs[:10, :])
f1_score = evaluation.compute_metric(gt_labels_test, teacher_probs, metric='f1')
ece = evaluation.compute_metric(gt_labels_test, teacher_probs, metric='ece')
print('Teacher f1-score: {}, Teacher ECE: {}'.format(f1_score, ece))

## Evaluation on out-of-distribution data

In [23]:
# Define task instructions for yahoo answers dataset
instructions = [
    'classify the question and answer below into one of the following topics:',
    'Assign a topic label to the following question and answer from the list provided:',
    'Determine which topic best fits the question and answer shown below:',
    'Categorize the following Q&A under one of these topics:',
    'Select the most appropriate topic for the question and answer pair below:',
    'Choose the correct topic category for the given question and answer:',
    'Identify the topic that the following question and answer belong to:',
    'Match the question and answer below to the relevant topic:',
    'Label the question and answer below with the most fitting topic from the list:'
]

In [24]:
# Load Yahoo Answers dataset
df_train = pd.read_csv('train_yahoo.csv', header=None)
df_test = pd.read_csv('test_yahoo.csv', header=None)
n_train = 50000  
n_in_context = 5  
n_val = 100
n_test = 5000
n_total_in_context = len(instructions) * n_in_context
df_train_actual = df_train.iloc[:n_train]
df_in_context_base = df_train.iloc[n_train:n_train + n_total_in_context]
df_val = df_train.iloc[n_train + n_total_in_context:n_train + n_total_in_context + n_val]
df_test_actual = df_test.iloc[:n_test]

def format_prompt(q1, q2, a):
    return "Question: " + q1.astype(str) + " " + q2.astype(str) + "\nAnswer: " + a.astype(str)

gt_labels_train = df_train_actual.iloc[:, 0].values.astype(int)
samples_train = format_prompt(df_train_actual.iloc[:, 1], df_train_actual.iloc[:, 2], df_train_actual.iloc[:, 3]).values

gt_labels_val = df_val.iloc[:, 0].values.astype(int)
samples_val = format_prompt(df_val.iloc[:, 1], df_val.iloc[:, 2], df_val.iloc[:, 3]).values

gt_labels_test = df_test_actual.iloc[:, 0].values.astype(int)
samples_test = format_prompt(df_test_actual.iloc[:, 1], df_test_actual.iloc[:, 2], df_test_actual.iloc[:, 3]).values

# Few-shot formatting for ensemble prompts
for i in range(len(instructions)):
    start_idx = i * n_in_context
    end_idx = (i + 1) * n_in_context
    df_in_context = df_in_context_base.iloc[start_idx:end_idx]

    samples_in_context_i = format_prompt(df_in_context.iloc[:, 1], df_in_context.iloc[:, 2], df_in_context.iloc[:, 3]).values
    gt_labels_in_context_i = df_in_context.iloc[:, 0].values.astype(int)

    if i == 0:
        samples_in_context = np.expand_dims(samples_in_context_i, axis=1)
        gt_labels_in_context = np.expand_dims(gt_labels_in_context_i, axis=1)
    else:
        samples_in_context = np.concatenate((samples_in_context, np.expand_dims(samples_in_context_i, axis=1)), axis=1)
        gt_labels_in_context = np.concatenate((gt_labels_in_context, np.expand_dims(gt_labels_in_context_i, axis=1)), axis=1)


In [25]:
# Prompt Formatting Class
class PromptFormatting(object):
    def __init__(self):
        self.INSTRUCTION = 'classify the question and answer below into one of the following topics:'
        self.CLASSES = [
    'Society & Culture',
    'Science & Mathematics',
    'Health',
    'Education & Reference',
    'Computers & Internet',
    'Sports',
    'Business & Finance',
    'Entertainment & Music',
    'Family & Relationships',
    'Politics & Government'
]
        self.CLASSES_FOR_MATCHING = [self.CLASSES]
        self.CLASSES_TEXT = '''1. {}\n2. {}\n3. {}\n4. {}\n5. {}\n6. {}\n7. {}\n8. {}\n9. {}\n10. {}'''.format(self.CLASSES[0],self.CLASSES[1], self.CLASSES[2], self.CLASSES[3], self.CLASSES[4], self.CLASSES[5], self.CLASSES[6], self.CLASSES[7], self.CLASSES[8], self.CLASSES[9])
    def format_instruction(self, instruction):
        return '''{}\n{}\n'''.format(instruction, self.CLASSES_TEXT)

    def format_content(self, content):
        return '''{}\nthe topic is '''.format(content)

prompt_formatting = PromptFormatting()



# Initialize BayesPE (Teacher Model)
bayespe_classifier = BayesPE(
    model_name="mistralai/Mistral-7B-Instruct-v0.3", 
    prompt_formatting=prompt_formatting,
    instructions=instructions, 
    few_shot_texts_sets=samples_in_context, 
    few_shot_labels_sets=gt_labels_in_context, 
    use_reduced_precision=True
)

# Print example prompt
bayespe_classifier.print_prompt_example()
bayespe_classifier.weights = weights

Loading checkpoint shards: 100%|██████████████████| 3/3 [00:02<00:00,  1.11it/s]


EXAMPLE 1:
classify the question and answer below into one of the following topics:
1. Society & Culture
2. Science & Mathematics
3. Health
4. Education & Reference
5. Computers & Internet
6. Sports
7. Business & Finance
8. Entertainment & Music
9. Family & Relationships
10. Politics & Government

Question: when you talk about the volume of a gas are you refering to the volume of the molecules themselves? explain?
Answer: No, the volume refers to the total space in which those molecules are found moving around (should be the same as the volume of the container). In any case, atoms and molecules are pretty much all empty space themselves - most of the mass is concentrated in the nucleus, but the electron cloud takes up a lot more space.
the topic is Science & Mathematics

EXAMPLE 2:
classify the question and answer below into one of the following topics:
1. Society & Culture
2. Science & Mathematics
3. Health
4. Education & Reference
5. Computers & Internet
6. Sports
7. Business & Finan

In [26]:
# Evaluate BayesPE performance on yahoo answers test data
teacher_probs,yahoo_probs,weights = bayespe_classifier.forward(samples_test, n_forward_passes=9)
print(teacher_probs[:10, :])
f1_score = evaluation.compute_metric(gt_labels_test, teacher_probs, metric='f1')
ece = evaluation.compute_metric(gt_labels_test, teacher_probs, metric='ece')
print('Teacher f1-score: {}, Teacher ECE: {}'.format(f1_score, ece))

inference for promt 1 out of 9


100%|███████████████████████████████████████| 5000/5000 [17:01<00:00,  4.90it/s]


inference for promt 2 out of 9


100%|███████████████████████████████████████| 5000/5000 [20:10<00:00,  4.13it/s]


inference for promt 3 out of 9


100%|███████████████████████████████████████| 5000/5000 [19:55<00:00,  4.18it/s]


inference for promt 4 out of 9


100%|███████████████████████████████████████| 5000/5000 [15:46<00:00,  5.28it/s]


inference for promt 5 out of 9


100%|███████████████████████████████████████| 5000/5000 [22:50<00:00,  3.65it/s]


inference for promt 6 out of 9


100%|███████████████████████████████████████| 5000/5000 [17:37<00:00,  4.73it/s]


inference for promt 7 out of 9


100%|███████████████████████████████████████| 5000/5000 [18:44<00:00,  4.45it/s]


inference for promt 8 out of 9


100%|███████████████████████████████████████| 5000/5000 [17:50<00:00,  4.67it/s]


inference for promt 9 out of 9


100%|███████████████████████████████████████| 5000/5000 [18:13<00:00,  4.57it/s]


[[8.45756762e-02 1.88207196e-05 3.39405291e-05 3.97109855e-03
  1.35915936e-05 2.62469348e-05 3.27115393e-05 3.59431180e-03
  9.07697825e-01 3.57699322e-05]
 [2.51664863e-01 7.29580817e-01 2.66205665e-03 1.48715499e-02
  4.11888132e-05 1.19254624e-04 2.00581671e-05 5.67739270e-04
  4.44848685e-04 2.76163162e-05]
 [2.57079452e-02 2.34858281e-02 1.42317237e-04 5.23723292e-02
  3.90818353e-04 3.79506955e-03 7.04204319e-05 8.80061135e-01
  1.38152784e-02 1.58851605e-04]
 [1.31991957e-05 1.94069460e-05 1.04723110e-05 9.99879883e-01
  1.21276148e-05 1.01098520e-05 1.37813362e-05 1.49860088e-05
  1.55330421e-05 1.04930465e-05]
 [9.15121512e-04 6.95151061e-03 9.83281760e-01 6.07006963e-03
  1.04610538e-05 1.20766586e-05 1.08142086e-05 1.49965487e-05
  2.70872762e-03 2.44550983e-05]
 [3.93673785e-01 7.35555392e-05 3.22188263e-02 8.14434073e-03
  5.47718338e-05 4.75290514e-05 1.98641850e-02 4.46969059e-04
  5.35088716e-01 1.03873144e-02]
 [1.29769277e-04 4.70437593e-05 1.22023357e-05 2.66148918e

In [28]:
# Compute predictive entropy
def entropy_numpy(probs: np.ndarray) -> np.ndarray:
    return entropy(probs, axis=1) 

In [29]:
import numpy as np
from scipy.stats import entropy

probs = np.transpose(yahoo_probs, (0, 2, 1))  
# Compute prompt wise predictive entropy
ent_per_prompt = entropy(probs, axis=2) 
# Weighted sum of prompt wise predictive entropies
yahoo_weighted_entropy = np.sum(ent_per_prompt * weights[None, :], axis=1)  
print(yahoo_weighted_entropy)

[0.21908174 0.43992501 0.43506405 ... 0.49847439 0.53801925 0.03105501]


In [30]:
# Mean predictive entropy
print(yahoo_weighted_entropy.mean())

0.41573226761274634


In [14]:
# Define task instructions for sst2 dataset
instructions = [
    "Classify the sentiment of the following movie review into one of the given categories.",
    "Determine the emotional tone expressed in the movie review excerpt below.",
    "Assign a sentiment label to the text based on its overall attitude.",
    "Analyze the review and select the appropriate sentiment category it falls under.",
    "What is the sentiment conveyed by this portion of the movie review? Choose from the specified classes.",
    "Label the following movie review extract with its correct sentiment: positive, negative, or neutral.",
    "Identify and classify the sentiment expressed in the review passage below.",
    "Based on the language and tone of the review, determine the correct sentiment label.",
    "Select the sentiment category that best matches the opinion expressed in the review snippet."
]

In [15]:
# Load sst2 dataset
df_train = pd.read_csv('train_sst2.csv')
df_test = pd.read_csv('test_sst2.csv')
n_train = 50000  
n_in_context = 5  
n_total_in_context = len(instructions) * n_in_context  
n_val=100
df_train_actual = df_train.iloc[:n_train] 
df_in_context_base = df_train.iloc[n_train:n_train + n_total_in_context]
df_val = df_train.iloc[n_train + n_total_in_context:n_train+n_total_in_context+n_val]
df_test_actual = df_test.iloc[:]  
gt_labels_train = df_train_actual.iloc[:, 2].values.astype(int) 
samples_train = df_train_actual.iloc[:, 1].values 
gt_labels_val = df_val.iloc[:, 2].values.astype(int) 
samples_val = df_val.iloc[:, 1].values 
gt_labels_test = df_test_actual.iloc[:, 2].values.astype(int)
samples_test = df_test_actual.iloc[:, 1].values 
# **Prepare Unique In-Context Examples Per Instruction**
for i in range(len(instructions)):  
    start_idx = i * n_in_context
    end_idx = (i + 1) * n_in_context
    df_in_context = df_in_context_base.iloc[start_idx:end_idx]

    samples_in_context_i = df_in_context.iloc[:, 1].values
    gt_labels_in_context_i = df_in_context.iloc[:, 2].values.astype(int)

    if i == 0:
        samples_in_context = np.expand_dims(samples_in_context_i, axis=1)
        gt_labels_in_context = np.expand_dims(gt_labels_in_context_i, axis=1)
    else:
        samples_in_context = np.concatenate((samples_in_context, np.expand_dims(samples_in_context_i, axis=1)), axis=1)
        gt_labels_in_context = np.concatenate((gt_labels_in_context, np.expand_dims(gt_labels_in_context_i, axis=1)), axis=1)


In [16]:
# Prompt Formatting Class
class PromptFormatting(object):
    def __init__(self):
        self.INSTRUCTION = 'Classify the sentiment of the following movie review into one of the given categories.'
        self.CLASSES = ['negative', 'positive']
        self.CLASSES_FOR_MATCHING = [self.CLASSES, ['neg', 'pos'], ['1', '2']]
        self.CLASSES_TEXT = '''1. {}\n2. {}'''.format(self.CLASSES[0], self.CLASSES[1])

    def format_instruction(self, instruction):
        return '''{}\n{}\n'''.format(instruction, self.CLASSES_TEXT)

    def format_content(self, content):
        return '''review: {}\nthe review is '''.format(content)

prompt_formatting = PromptFormatting()



# Initialize BayesPE (Teacher Model)
bayespe_classifier = BayesPE(
    model_name="mistralai/Mistral-7B-Instruct-v0.3", 
    prompt_formatting=prompt_formatting,
    instructions=instructions, 
    few_shot_texts_sets=samples_in_context, 
    few_shot_labels_sets=gt_labels_in_context, 
    use_reduced_precision=True
)

# Print example prompt
bayespe_classifier.print_prompt_example()
bayespe_classifier.weights = weights

Loading checkpoint shards: 100%|██████████████████| 3/3 [00:02<00:00,  1.11it/s]


EXAMPLE 1:
Classify the sentiment of the following movie review into one of the given categories.
1. negative
2. positive

review: glow 
the review is positive

EXAMPLE 2:
Classify the sentiment of the following movie review into one of the given categories.
1. negative
2. positive

review: a classical dramatic animated feature 
the review is positive

EXAMPLE 3:
Classify the sentiment of the following movie review into one of the given categories.
1. negative
2. positive

review: best espionage picture 
the review is positive

EXAMPLE 4:
Classify the sentiment of the following movie review into one of the given categories.
1. negative
2. positive

review: drag on for nearly three hours 
the review is negative

EXAMPLE 5:
Classify the sentiment of the following movie review into one of the given categories.
1. negative
2. positive

review: the entire point of a shaggy dog story , of course , is that it goes nowhere , and 
the review is negative

EXAMPLE 6:
Classify the sentiment of the

In [17]:
# Evaluate BayesPE performance on sst2 test data
teacher_probs,sst2_probs,weights = bayespe_classifier.forward(samples_test, n_forward_passes=9)
print(teacher_probs[:10, :])
f1_score = evaluation.compute_metric(gt_labels_test, teacher_probs, metric='f1')
ece = evaluation.compute_metric(gt_labels_test, teacher_probs, metric='ece')
print('Teacher f1-score: {}, Teacher ECE: {}'.format(f1_score, ece))

inference for promt 1 out of 9


100%|█████████████████████████████████████████| 872/872 [01:02<00:00, 13.93it/s]


inference for promt 2 out of 9


100%|█████████████████████████████████████████| 872/872 [01:02<00:00, 14.04it/s]


inference for promt 3 out of 9


100%|█████████████████████████████████████████| 872/872 [01:03<00:00, 13.81it/s]


inference for promt 4 out of 9


100%|█████████████████████████████████████████| 872/872 [01:04<00:00, 13.51it/s]


inference for promt 5 out of 9


100%|█████████████████████████████████████████| 872/872 [01:05<00:00, 13.27it/s]


inference for promt 6 out of 9


100%|█████████████████████████████████████████| 872/872 [01:04<00:00, 13.44it/s]


inference for promt 7 out of 9


100%|█████████████████████████████████████████| 872/872 [01:03<00:00, 13.83it/s]


inference for promt 8 out of 9


100%|█████████████████████████████████████████| 872/872 [01:05<00:00, 13.31it/s]


inference for promt 9 out of 9


100%|█████████████████████████████████████████| 872/872 [01:05<00:00, 13.31it/s]


[[2.23062986e-05 9.99977686e-01]
 [9.97405473e-01 2.59451908e-03]
 [9.45295187e-05 9.99905463e-01]
 [5.67206895e-05 9.99943272e-01]
 [9.99415446e-01 5.84546984e-04]
 [2.96851486e-04 9.99703141e-01]
 [9.99112874e-01 8.87118647e-04]
 [9.87194502e-01 1.28054907e-02]
 [5.20410169e-05 9.99947952e-01]
 [9.99769333e-01 2.30659387e-04]]
Teacher f1-score: 0.9541281990583655, Teacher ECE: 0.027956411242485046


In [18]:
import numpy as np
from scipy.stats import entropy

probs = np.transpose(sst2_probs, (0, 2, 1))  
# Compute prompt wise predictive entropy
ent_per_prompt = entropy(probs, axis=2) 
# Weighted sum of prompt wise predictive entropies
sst2_weighted_entropy = np.sum(ent_per_prompt * weights[None, :], axis=1)  
print(sst2_weighted_entropy)

[2.60829570e-04 1.64556254e-02 9.37743121e-04 6.09520042e-04
 4.75074880e-03 2.63271611e-03 6.83631451e-03 5.13098683e-02
 5.60818755e-04 2.07374875e-03 5.60142845e-04 1.86953013e-03
 1.22071397e-02 5.08598029e-01 2.44461213e-03 3.36788701e-04
 6.22273778e-02 4.62060310e-04 4.52553593e-03 1.10559002e-03
 1.55338566e-01 3.05916513e-03 1.05964496e-02 4.97998038e-04
 3.96572236e-04 2.65204890e-03 3.27095251e-03 1.57305349e-03
 1.86118892e-03 1.50104502e-03 1.63934612e-03 1.64008663e-03
 3.26955232e-04 6.20146960e-02 1.17708395e-03 1.23880920e-02
 1.80757666e-03 4.57214342e-02 5.28104270e-03 5.40318854e-04
 8.40955629e-04 5.33746495e-04 2.01730174e-02 4.07244147e-04
 1.47119781e-03 2.93760168e-01 4.55263360e-03 1.72930241e-03
 3.88445090e-04 1.70793425e-03 1.39753924e-03 4.90201166e-04
 5.89423852e-02 7.77626528e-03 1.64695951e-03 3.21488172e-04
 2.64899620e-03 1.32829908e-02 2.94708086e-03 3.00498433e-03
 5.56333083e-04 3.91842499e-03 7.76562098e-03 4.19299112e-04
 5.13634788e-02 4.637076

In [19]:
# Mean predictive entropy
print(sst2_weighted_entropy.mean())

0.034164633291940354


In [20]:
# Define task instructions for yahoo answers dataset
instructions = [
    "Is the following Youtube comment spam?",
    "Determine whether the given Youtube comment is spam.",
    "Classify the following Youtube comment as spam or not.",
    "Check if the Youtube comment below is considered spam.",
    "Decide if the given Youtube comment is spam.",
    "Is this Youtube comment a form of spam?",
    "Evaluate whether this Youtube comment qualifies as spam.",
    "Identify if the following comment on Youtube is spam.",
    "Judge whether the Youtube comment should be flagged as spam."
]

In [21]:
# Load youtube comments dataset
df_train = pd.read_csv('youtube.csv')
n_train = 1100  
n_in_context = 5 
n_total_in_context = len(instructions) * n_in_context  
n_val=100
df_train_actual = df_train.iloc[:n_train] 
df_in_context_base = df_train.iloc[n_train:n_train + n_total_in_context]
df_val = df_train.iloc[n_train + n_total_in_context:n_train+n_total_in_context+n_val]
df_test_actual = df_train.iloc[n_train+n_total_in_context+n_val:]  
gt_labels_train = df_train_actual.iloc[:, 4].values.astype(int) 
samples_train = df_train_actual.iloc[:, 3].values 
gt_labels_val = df_val.iloc[:, 4].values.astype(int) 
samples_val = df_val.iloc[:, 3].values 
gt_labels_test = df_test_actual.iloc[:, 4].values.astype(int)
samples_test = df_test_actual.iloc[:, 3].values 

# **Prepare Unique In-Context Examples Per Instruction**
for i in range(len(instructions)):  
    start_idx = i * n_in_context
    end_idx = (i + 1) * n_in_context
    df_in_context = df_in_context_base.iloc[start_idx:end_idx]

    samples_in_context_i = df_in_context.iloc[:, 3].values
    gt_labels_in_context_i = df_in_context.iloc[:, 4].values.astype(int)

    if i == 0:
        samples_in_context = np.expand_dims(samples_in_context_i, axis=1)
        gt_labels_in_context = np.expand_dims(gt_labels_in_context_i, axis=1)
    else:
        samples_in_context = np.concatenate((samples_in_context, np.expand_dims(samples_in_context_i, axis=1)), axis=1)
        gt_labels_in_context = np.concatenate((gt_labels_in_context, np.expand_dims(gt_labels_in_context_i, axis=1)), axis=1)


In [22]:
# Prompt Formatting Class
class PromptFormatting(object):
    def __init__(self):
        self.INSTRUCTION = 'Is the following Youtube comment spam?'
        self.CLASSES = ['not spam', 'spam']
        self.CLASSES_FOR_MATCHING = [self.CLASSES, ['ham', 'spam'], ['0', '1']]
        self.CLASSES_TEXT = '''1. {}\n2. {}'''.format(self.CLASSES[0], self.CLASSES[1])

    def format_instruction(self, instruction):
        return '''{}\n{}\n'''.format(instruction, self.CLASSES_TEXT)

    def format_content(self, content):
        return '''comment: {}\nthe comment is '''.format(content)

prompt_formatting = PromptFormatting()



# Initialize BayesPE (Teacher Model)
bayespe_classifier = BayesPE(
    model_name="mistralai/Mistral-7B-Instruct-v0.3", 
    prompt_formatting=prompt_formatting,
    instructions=instructions, 
    few_shot_texts_sets=samples_in_context, 
    few_shot_labels_sets=gt_labels_in_context, 
    use_reduced_precision=True
)

# Print example prompt
bayespe_classifier.print_prompt_example()
bayespe_classifier.weights = weights

Loading checkpoint shards: 100%|██████████████████| 3/3 [00:02<00:00,  1.10it/s]


EXAMPLE 1:
Is the following Youtube comment spam?
1. not spam
2. spam

comment: Check out my music niggas﻿
the comment is spam

EXAMPLE 2:
Is the following Youtube comment spam?
1. not spam
2. spam

comment: Check out this video on YouTube:﻿
the comment is spam

EXAMPLE 3:
Is the following Youtube comment spam?
1. not spam
2. spam

comment: Check out this funny video &quot;Cereal Box Knocks out Baby&quot; on my channel.﻿
the comment is spam

EXAMPLE 4:
Is the following Youtube comment spam?
1. not spam
2. spam

comment: Subscribe to me if u think &quot;swag&quot; is fucking stupid﻿
the comment is spam

EXAMPLE 5:
Is the following Youtube comment spam?
1. not spam
2. spam

comment: Tuto to subscribe to my channel because you should sign up for 17 l please thank you I&#39;d do anything for you to sign up a lot of good video I usually do!﻿
the comment is spam

EXAMPLE 6:
Is the following Youtube comment spam?
1. not spam
2. spam

comment: <SAMPLE_IN>
the comment is <LABEL_OUT>


In [23]:
# Evaluate BayesPE performance on youtube comments test data
teacher_probs,youtube_probs,weights = bayespe_classifier.forward(samples_test, n_forward_passes=9)
print(teacher_probs[:10, :])
f1_score = evaluation.compute_metric(gt_labels_test, teacher_probs, metric='f1')
ece = evaluation.compute_metric(gt_labels_test, teacher_probs, metric='ece')
print('Teacher f1-score: {}, Teacher ECE: {}'.format(f1_score, ece))

inference for promt 1 out of 9


100%|█████████████████████████████████████████| 711/711 [00:55<00:00, 12.78it/s]


inference for promt 2 out of 9


100%|█████████████████████████████████████████| 711/711 [01:04<00:00, 11.07it/s]


inference for promt 3 out of 9


100%|█████████████████████████████████████████| 711/711 [00:55<00:00, 12.85it/s]


inference for promt 4 out of 9


100%|█████████████████████████████████████████| 711/711 [01:03<00:00, 11.12it/s]


inference for promt 5 out of 9


100%|█████████████████████████████████████████| 711/711 [00:59<00:00, 11.97it/s]


inference for promt 6 out of 9


100%|█████████████████████████████████████████| 711/711 [00:56<00:00, 12.70it/s]


inference for promt 7 out of 9


100%|█████████████████████████████████████████| 711/711 [00:57<00:00, 12.39it/s]


inference for promt 8 out of 9


100%|█████████████████████████████████████████| 711/711 [00:59<00:00, 11.91it/s]


inference for promt 9 out of 9


100%|█████████████████████████████████████████| 711/711 [01:02<00:00, 11.32it/s]

[[5.89566912e-02 9.41043301e-01]
 [6.21771073e-04 9.99378221e-01]
 [6.94467871e-02 9.30553205e-01]
 [7.40624053e-04 9.99259368e-01]
 [7.40624053e-04 9.99259368e-01]
 [9.94847618e-01 5.15237435e-03]
 [9.92957754e-01 7.04223902e-03]
 [1.36637034e-04 9.99863356e-01]
 [1.05120554e-01 8.94879439e-01]
 [1.05120554e-01 8.94879439e-01]]
Teacher f1-score: 0.8787151493993874, Teacher ECE: 0.03464491665363312





In [24]:
import numpy as np
from scipy.stats import entropy

probs = np.transpose(youtube_probs, (0, 2, 1))  
# Compute prompt wise predictive entropy
ent_per_prompt = entropy(probs, axis=2) 
# Weighted sum of prompt wise predictive entropies
youtube_weighted_entropy = np.sum(ent_per_prompt * weights[None, :], axis=1)  
print(youtube_weighted_entropy)

[0.17063244 0.00496051 0.10939222 0.00578485 0.00578485 0.02790974
 0.03794173 0.00131676 0.19368446 0.19368446 0.40600933 0.11768295
 0.00288808 0.06727981 0.15339182 0.46094292 0.23556086 0.19368446
 0.05676766 0.22681406 0.47486896 0.08808524 0.0527278  0.03188253
 0.05257225 0.40524636 0.02928423 0.41103096 0.03644919 0.02442789
 0.12086012 0.16785526 0.50101336 0.02607865 0.02900213 0.2764365
 0.05623508 0.37179636 0.15153049 0.24571285 0.27515651 0.36238353
 0.41266723 0.06552536 0.0101001  0.20329124 0.01613747 0.36238353
 0.08537064 0.00562075 0.0300261  0.10909997 0.04952306 0.28148751
 0.26081753 0.30855909 0.10693868 0.10104218 0.06010398 0.34865794
 0.02135728 0.09563375 0.18358281 0.07654769 0.44275371 0.07444779
 0.39614169 0.01578395 0.10193236 0.25555387 0.4316324  0.03085098
 0.01659718 0.0941828  0.06494452 0.11752309 0.02388175 0.00192674
 0.46965758 0.55860553 0.11829509 0.04635694 0.12029956 0.17905823
 0.27586304 0.17544742 0.02277353 0.12116817 0.02297473 0.03239

In [25]:
# Mean predictive entropy
print(youtube_weighted_entropy.mean())

0.17132808865700833
