In [None]:
## Parameters
import math
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
api_key = '{Your API Key}'
os.environ["OPENAI_API_KEY"] = api_key
dataset = 'banking77' 
# dataset: banking77, clinc_domain(clinc), few_nerd_nat, few_rel_nat, few_event, go_emotion
# dataset: massive_intent(massive_scenario), mtop_intent(mtop_domain), arxiv_fine, reddit, stackexchange
llm = 'gpt-4o-mini' # 'gpt-4o-mini', 'gpt-4o', 
seed = 42
toy_mode = True

# Ouput directory
output_root = './output'
experiment_name = 'nuc'
output_dir = os.path.join(os.path.join(output_root, experiment_name), dataset)
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
print('Output directory:', output_dir)

Output directory: ./output/nuc/banking77


In [2]:
## Load Data 
data_path = f'../data/datasets/{dataset}/large.jsonl'
print('Data path:', data_path)
data = pd.read_json(data_path, lines=True)

# Obtain all unique labels
labels = data['label'].unique()
print('Number of unique labels:', len(labels))
print('Number of samples:', len(data))

# Rename column 'input' to 'text'
data = data.rename(columns={'input': 'text'})

# Only take the first 25K samples to save cost
data = data[:25000]
# Obtain all unique labels
labels = data['label'].unique()
print('Number of unique labels:', len(labels))
print('Number of samples:', len(data))

# Display the first few rows of the data
data

Data path: ../data/datasets/banking77/large.jsonl
Number of unique labels: 77
Number of samples: 10003
Number of unique labels: 77
Number of samples: 10003


Unnamed: 0,task,text,label
0,banking77,My physical card is not working,card not working
1,banking77,Can you unblock my account? I entered the PIN...,pin blocked
2,banking77,I need to know what is going on. I'm attemptin...,failed transfer
3,banking77,I am unable to prove my identity,unable to verify identity
4,banking77,I am still waiting for my card.,card arrival
...,...,...,...
9998,banking77,What is the present state of the exchange rate?,exchange rate
9999,banking77,maximum how many days get the courier?,card arrival
10000,banking77,How would I top up with a cheque?,top up by cash or cheque
10001,banking77,Where can I change my PIN?,change pin


In [3]:
# Pseudo-Labeling Prompt
def construct_prompt_psl(data_selected, labels, data_labeled=None):
    prompt = "Instruction: Please select a label from the provided options to the following testing samples. \n\n"

    prompt += "Label Options: " + str(labels) + "\n\n"

    prompt += 'Also show your confidence in the label assignment by providing a probability between 0 and 1.\n\n'

    if data_labeled is not None:
        prompt += "\n== Demonstrations == \n\n"
        for i, example in enumerate(data_labeled.iterrows()):
                prompt += f"Text {i+1}: {example[1]['text']}\nLabel: {example[1]['label']}\n"

    prompt += "\n== Testing Samples == \n\n"

    for i, example in enumerate(data_selected.iterrows()):
        prompt += f"Text {i+1}: {example[1]['text']}\n"

    return prompt


# Query OpenAI API
from openai import OpenAI
from pydantic import BaseModel
from typing import List
client = OpenAI()

## Pseudo-Labeling Query
class PseudoLabelOne(BaseModel):
    text: str
    label: str
    confidence: float

class PseudoLabel(BaseModel):
    examples: List[PseudoLabelOne]

def query_openai_psl(prompt):
    completion = client.beta.chat.completions.parse(
        model= 'gpt-4o-mini',
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.7,  # Set to 0.7 as suggested by previous papers
        response_format=PseudoLabel,
    )

    return completion.choices[0].message.parsed, completion

In [None]:
## Pseudo-Labeling
def batch_querying(data_selected):
    # Batch Querying
    # Compute the number of query rounds needed
    num_rounds = math.ceil(len(data_selected) / num_queries_each_round)
    print('Number of query rounds:', num_rounds)

    predictions = []
    texts, confidences = [], []
    ground_truths = []
    prompts = []
    responses, completions = [], []
    # Query the model for each round
    # for i in range(num_rounds):
    for i in tqdm(range(num_rounds), desc="Querying rounds"):
        prompt = construct_prompt_psl(data_selected[i*num_queries_each_round:(i+1)*num_queries_each_round], labels)

        try:
            response, completion = query_openai_psl(prompt)
            cur_predictions = [example.label for example in response.examples]
            cur_confidences = [example.confidence for example in response.examples]

            # If the number of predictions is less than the number of queries, count this as a failure and fill in None
            if len(cur_predictions) != len(data_selected[i*num_queries_each_round:(i+1)*num_queries_each_round]):
                print(f"LLM query failed with exception for the {i}-th round: Number of predictions not equal to number of queries")
                print(f"Number of predictions: {len(cur_predictions)}, Number of queries: {len(data_selected[i*num_queries_each_round:(i+1)*num_queries_each_round])}")
                cur_predictions = [None] * len(data_selected[i*num_queries_each_round:(i+1)*num_queries_each_round])
                cur_confidences = [None] * len(data_selected[i*num_queries_each_round:(i+1)*num_queries_each_round])
            
        except Exception as e:
            print(f"LLM query failed with exception for the {i}-th round: {e}")
            response, completion = None, None
            cur_predictions = [None] * len(data_selected[i*num_queries_each_round:(i+1)*num_queries_each_round])
            cur_confidences = [None] * len(data_selected[i*num_queries_each_round:(i+1)*num_queries_each_round])

        cur_texts = data_selected['text'][i*num_queries_each_round:(i+1)*num_queries_each_round]
        cur_ground_truth = data_selected['label'][i*num_queries_each_round:(i+1)*num_queries_each_round]
        texts.extend(cur_texts)
        ground_truths.extend(cur_ground_truth)
        predictions.extend(cur_predictions)
        confidences.extend(cur_confidences)
        prompts.append(prompt)
        responses.append(response)
        completions.append(completion)

        if i < 1:
            print('Prompt:', prompt)
            print(f'Pseudo-Labeling Querying Round {i+1}:')
            print('Ground Truth:', list(cur_ground_truth))
            print('Predictions:', cur_predictions)
            print('Confidences:', cur_confidences)

    return texts, ground_truths, predictions, confidences, prompts, responses, completions


# data_selected = data
if toy_mode:
    data_selected = data.iloc[:5] # toy data for demonstration
else:
    data_selected = data
num_queries_each_round = 10
texts, ground_truths, predictions, confidences, prompts, responses, completions = batch_querying(data_selected)


Number of query rounds: 1


Querying rounds: 100%|██████████| 1/1 [00:03<00:00,  3.70s/it]

Prompt: Instruction: Please select a label from the provided options to the following testing samples. 

Label Options: ['card not working' 'pin blocked' 'failed transfer'
 'unable to verify identity' 'card arrival' 'pending card payment'
 'card delivery estimate' 'card payment wrong exchange rate'
 'getting spare card' 'age limit' 'edit personal details'
 'topping up by card' 'exchange via app' 'change pin' 'activate my card'
 'wrong exchange rate for cash withdrawal' 'card about to expire'
 'top up by bank transfer charge' 'automatic top up'
 'declined card payment' 'declined cash withdrawal' 'lost or stolen phone'
 'request refund' 'extra charge on statement' 'verify source of funds'
 'card payment fee charged' 'card payment not recognised'
 'Refund not showing up' 'direct debit payment not recognised'
 'fiat currency support'
 'balance not updated after cheque or cash deposit' 'top up reverted'
 'transaction charged twice' 'supported cards and currencies'
 'card linking' 'pending t




In [5]:
# Construct a dataframe, containing 4 columns: text, ground truth, prediction, confidence
data_psl = pd.DataFrame({'text': texts, 'ground_truth': ground_truths, 'prediction': predictions, 'confidence': confidences})
# drop rows with None values
data_psl_cleaned = data_psl.dropna()

# Obtain all unique labels
print('Number of unique labels:', len(data_psl_cleaned['prediction'].unique()))
print('Number of samples:', len(data_psl_cleaned))

data_psl_cleaned

Number of unique labels: 5
Number of samples: 5


Unnamed: 0,text,ground_truth,prediction,confidence
0,My physical card is not working,card not working,card not working,0.95
1,Can you unblock my account? I entered the PIN...,pin blocked,pin blocked,0.9
2,I need to know what is going on. I'm attemptin...,failed transfer,failed transfer,0.85
3,I am unable to prove my identity,unable to verify identity,unable to verify identity,0.9
4,I am still waiting for my card.,card arrival,card delivery estimate,0.88


In [6]:
# Save the pseudo-labeled data
output_filename = f'{dataset}_pseudo_labeled_{llm}_demo.jsonl'
output_path = os.path.join(output_dir, output_filename)
print('Output path:', output_path)

data_psl_cleaned.to_json(output_path, orient='records', lines=True)

Output path: ./output/nuc/banking77/banking77_pseudo_labeled_gpt-4o-mini_demo.jsonl
