<a href="https://colab.research.google.com/github/Abhi23run/Uncertainty_Quantification_LLMs/blob/main/Code/Uncertainty_Quantification_Pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!ls

drive  sample_data


In [2]:
import os
dir_path = '/content/drive/MyDrive/Uncertainty_Quantification_LLMs'
os.chdir(dir_path)

Install the required libraries

In [3]:
%%capture
!pip install -q -U bitsandbytes
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install -q datasets
!pip install evaluate
!pip install -qqq trl==0.7.1
!pip install torch

In [4]:
import torch
import gc
import time
import evaluate
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns
from datasets import Dataset, load_dataset
import random
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

Create the features for stress index

In [5]:
def simulate_data(num_samples):
  # Define parameters for each distribution
  np.random.seed(0)

  # Generate synthetic data
  data = {
      'news_sentiment': stats.beta.rvs(a=2, b=2, loc=-1, scale=2, size=num_samples),
      'broker_count_imbalance': stats.beta.rvs(a=2, b=2, loc=-1, scale=2, size=num_samples),
      'volume_indicator': stats.beta.rvs(a=2, b=2, loc=-1, scale=2, size=num_samples),
      'benchmark_price_difference': stats.beta.rvs(a=2, b=2, loc=-1, scale=2, size=num_samples),
      'trade_count_imbalance': stats.beta.rvs(a=2, b=2, loc=-1, scale=2, size=num_samples),
      'one_sided_trade_indicator': stats.beta.rvs(a=2, b=2, loc=-1, scale=2, size=num_samples),
      'tranche_size_indicator': stats.beta.rvs(a=2, b=2, loc=0, scale=1, size=num_samples)
  }

  # Create DataFrame
  df = pd.DataFrame(data)
  return df

In [None]:
df = simulate_data(5000)

In [None]:
df.to_csv('./data_simulated.csv', index=False)

In [6]:
df = pd.read_csv('./Data/data_simulated.csv', index_col=False)

Created the stress index using ChatGPT and populated as a column in the above dataframe

In [7]:
df_stress_index = pd.read_csv('./Data/data_with_stress_index.csv',usecols=lambda col:col not in ['Unnamed: 0'])

Creating the stress index buckets to model as a classification task

In [8]:
n_bins = 10
bin_width = 1/n_bins

In [None]:
df_stress_index.loc[:,'stress_index_bucket']=pd.cut(df_stress_index['stress_index'],\
                                                    bins=n_bins,\
                                                    labels=[str(np.round(x,1))+'-'+str(np.round(x+bin_width,1)) \
                                                            for x in np.arange(0,1,bin_width)])

In [None]:
df_stress_index.head()

Unnamed: 0,news_sentiment,broker_count_imbalance,volume_indicator,benchmark_price_difference,trade_count_imbalance,one_sided_trade_indicator,tranche_size_indicator,stress_index,stress_index_bucket
0,0.393144,0.124362,0.677889,0.497776,-0.720698,-0.680076,0.724869,0.551515,0.5-0.6
1,0.772496,0.369376,-0.877238,0.139748,0.851864,-0.257519,0.586063,0.623805,0.6-0.7
2,-0.189353,0.190013,-0.485352,-0.364914,-0.183833,-0.235899,0.356616,0.305677,0.3-0.4
3,0.21909,-0.57793,0.87617,0.495462,0.182495,0.09434,0.106455,0.599768,0.5-0.6
4,0.038766,0.474529,0.345526,-0.15337,-0.020692,0.238055,0.432594,0.594587,0.5-0.6


Creating data for MCQ Format in Uncertainty quantification method

In [9]:
def shuffle_list(original_list):
    # Create a copy of the list
    shuffled_list = original_list.copy()

    # Shuffle the copy
    random.shuffle(shuffled_list)

    return shuffled_list

In [10]:
list_stress_index_bucket=[str(np.round(x,1))+'-'+str(np.round(x+bin_width,1)) for x in np.arange(0,1,bin_width)]
list_option_choices=[chr(x) for x in range(65,65+len(list_stress_index_bucket))]

**Trading Bot Prompting**

In [11]:
import getpass

# Prompt for the Hugging Face token
hf_token = getpass.getpass("Enter your Hugging Face token: ")

import os
os.environ['HUGGINGFACE_TOKEN'] = hf_token

Enter your Hugging Face token: ··········


In [12]:
# model_id =  "NousResearch/Llama-2-7b-hf"
# model_id = "meta-llama/Llama-2-7b-chat-hf"
# model_id = "mistralai/Mistral-7B-v0.1"
model_id = "microsoft/phi-2"
bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
    )
# quantization_config = BitsAndBytesConfig(load_in_4bit=True)
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map="auto",token=hf_token)

config.json:   0%|          | 0.00/863 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/35.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/564M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [13]:
tokenizer = AutoTokenizer.from_pretrained(model_id,token=hf_token)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

tokenizer_config.json:   0%|          | 0.00/7.34k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


**Prompt with 1 shot example (first row of the dataframe)**

In [None]:
instruction="""
###Instruction:
You are fixed income corporate bond trader. Use the feature values that I will provide and select the correct option for classifying the stress index (range of 0 to 1) into one of the defined buckets.
Feature descriptions:
- news_sentiment: Scale from -1 (negative) to +1 (positive) reflecting the sentiment in news about the bond issuer over the past 7 days.
- broker_count_imbalance: Difference in broker count buying or selling the same security in the last 2 days, ranging from -1 (more selling) to +1 (more buying).
- volume_indicator: Indicates if a bond is heavily traded in the last 2 days, with -1 for high selling volume and +1 for high buying volume.
- benchmark_price_difference: Compares a bond's quoted price to the benchmark, ranging from -1 (below benchmark) to +1 (above benchmark), indicating price stress.
- trade_count_imbalance: Difference in buy and sell trades of a security over 2 days, from -1 (more sells) to +1 (more buys).
- one_sided_trade_indicator: Imbalance in buy or sell trades over 7 days, with -1 for predominantly sell trades and +1 for buy trades.
- tranche_size_indicator: Assesses bond stress by the direction of trades and tranche size, ranging from 0 (large tranche) to 1 (small tranche).
###Question:
Given the feature value dict as:
{'news_sentiment': 0.3931444486731639, 'broker_count_imbalance': 0.1243619871996783, 'volume_indicator': 0.6778892697073087, 'benchmark_price_difference': 0.4977762417709135, 'trade_count_imbalance': -0.7206982745760815, 'one_sided_trade_indicator': -0.6800763028141159, 'tranche_size_indicator': 0.7248691201811168}
Return the correct option for classifying stress index from the following options based on above features. Do not generate anything else apart from the option.
"""

In [None]:
choice_prompt=''''''
for i,j in zip(list_option_choices,shuffle_list(list_stress_index_bucket)):
    choice_prompt+=f'({i}) {j}\n'

In [None]:
example_prompt=''''''
example_prompt+=instruction
example_prompt+=choice_prompt
example_prompt+='''
###Answer:
B
'''
##Manually added the answer from the first row of the dataframe (taken as the 1 shot example)

Final prompt for the model to return the stress index bucket

In [None]:
final_prompt=''''''
final_prompt+=example_prompt
final_prompt+='''
###Question:
Given the feature value dict as:
{'news_sentiment': 0.7724956104208642, 'broker_count_imbalance': 0.3693763241525458, 'volume_indicator': -0.8772379830243947, 'benchmark_price_difference': 0.1397481154484952, 'trade_count_imbalance': 0.8518638934191982, 'one_sided_trade_indicator': -0.2575187071551993, 'tranche_size_indicator': 0.5860627411447076}
Return the correct option for classifying stress index from the following options based on above features. Do not generate anything else apart from the option.
'''
choice_prompt=''''''
for i,j in zip(list_option_choices,shuffle_list(list_stress_index_bucket)):
    choice_prompt+=f'({i}) {j}\n'

final_prompt+=choice_prompt
final_prompt+='''
###Answer:
'''

Model Generation to see what the model predicted as next token:

In [None]:
def model_generation(model, prompt):
    inputs = tokenizer(prompt, return_tensors='pt')
    generated_ids=model.generate(
        inputs["input_ids"],
        temperature=0.2,
        max_new_tokens=1
    )
    output = tokenizer.decode(
        generated_ids[0],
        # stopping_criteria = [EosListStoppingCriteria()] ,
        skip_special_tokens=False
    )
    return output

In [None]:
output = model_generation(model, final_prompt)

In [None]:
print(output)

In [None]:
def delete_model(model):
    del model
    gc.collect()
    torch.cuda.empty_cache()
    return

In [None]:
def delete_generation_output(output):
    del output
    gc.collect()
    torch.cuda.empty_cache()
    return

Model Forward to get the logits of the next token:

In [None]:
def model_forward(model, prompt):
    inputs = tokenizer(prompt, return_tensors='pt')
    try:
        outputs=model(inputs["input_ids"])
    except:
        return
    output_logits = outputs.logits.detach().cpu()
    next_token_logits = output_logits[:, -1]
    return next_token_logits

In [None]:
##Softmax function to convert logits to probabilities
def softmax(x):
    e_x=np.exp(x-np.max(x))
    return e_x/e_x.sum()

In [None]:
tokens_of_interest= list_option_choices #the option choices corresponding to stress index buckets

token_indices = tokenizer.convert_tokens_to_ids(tokens_of_interest) #get the index for the option tokens

indices_in_logits = {token: next_token_logits[0,token_idx].item() for token, token_idx in zip(tokens_of_interest, token_indices)}

label_to_softmax_dict=dict(zip(tokens_of_interest,softmax(np.array(list(indices_in_logits.values())))))

In [None]:
print(label_to_softmax_dict) ##this would show the softmax for each of the labels (options)

***Run the code below to delete the variables and free up unused memory (Used when running the above model forward again to avoid CUDA out of memory issue): Note: You would to create the inputs variable again***

In [None]:
# Clear memory
def delete_inputs_outputs(inputs):
  del inputs
  del outputs
  gc.collect()
  torch.cuda.empty_cache()
  return

In [None]:
def get_values_before_key(sorted_dict, key): ###updated the function to include the logits not including the true label
    values_before_key = []
    for k, v in sorted_dict.items():
        if k == key:
            # values_before_key.append(v)
            break
        values_before_key.append(v)
    return values_before_key

#### LAC CONFORMAL SCORE

In [15]:
def lac(true_label, label_softmax_dict):
    lac_score = 1.0 - label_softmax_dict[true_label]
    return lac_score

In [None]:
##Calculating the LAC conformal score for the example
print(lac('H',label_to_softmax_dict))

#### APS CONFORMAL SCORE

In [14]:
def aps(true_label, label_softmax_dict):
    sorted_softmax_dict = dict(sorted(label_to_softmax_dict.items(), key=lambda item: item[1], reverse=True))
    high_labels = get_values_before_key(sorted_softmax_dict, true_label)
    aps_score = sum(high_labels)
    return aps_score

In [None]:
print(aps('H', label_to_softmax_dict))

Code Segment for Batch Testing

Creating prompt column for the dataset:

In [None]:
feature_cols=['news_sentiment', 'broker_count_imbalance', 'volume_indicator',
       'benchmark_price_difference', 'trade_count_imbalance',
       'one_sided_trade_indicator', 'tranche_size_indicator']
bucket_col='stress_index_bucket'

base_instruction = """###Instruction:
You are fixed income corporate bond trader. Use the feature values that I will provide and select the correct option for classifying the stress index (range of 0 to 1) into one of the defined buckets.
Feature descriptions:
- news_sentiment: Scale from -1 (negative) to +1 (positive) reflecting the sentiment in news about the bond issuer over the past 7 days.
- broker_count_imbalance: Difference in broker count buying or selling the same security in the last 2 days, ranging from -1 (more selling) to +1 (more buying).
- volume_indicator: Indicates if a bond is heavily traded in the last 2 days, with -1 for high selling volume and +1 for high buying volume.
- benchmark_price_difference: Compares a bond's quoted price to the benchmark, ranging from -1 (below benchmark) to +1 (above benchmark), indicating price stress.
- trade_count_imbalance: Difference in buy and sell trades of a security over 2 days, from -1 (more sells) to +1 (more buys).
- one_sided_trade_indicator: Imbalance in buy or sell trades over 7 days, with -1 for predominantly sell trades and +1 for buy trades.
- tranche_size_indicator: Assesses bond stress by the direction of trades and tranche size, ranging from 0 (large tranche) to 1 (small tranche).
"""

base_instruction+=f'''
###Question:
Given the feature value dict as:
{str(df_stress_index.head(1)[feature_cols].to_dict(orient='records')[0])}
Return the correct option for classifying stress index from the following options based on above features. Do not generate anything else apart from the option.
'''

choice_prompt=''''''
for i,j in zip(list_option_choices,shuffle_list(list_stress_index_bucket)):
    choice_prompt+=f'({i}) {j}\n'
    if j==df_stress_index.head(1)[bucket_col].values[0]:
      base_answer=i

base_instruction+=choice_prompt
base_instruction+=f'''
###Answer:
{base_answer}
'''


In [None]:
print(base_instruction)

###Instruction:
You are fixed income corporate bond trader. Use the feature values that I will provide and select the correct option for classifying the stress index (range of 0 to 1) into one of the defined buckets.
Feature descriptions:
- news_sentiment: Scale from -1 (negative) to +1 (positive) reflecting the sentiment in news about the bond issuer over the past 7 days.
- broker_count_imbalance: Difference in broker count buying or selling the same security in the last 2 days, ranging from -1 (more selling) to +1 (more buying).
- volume_indicator: Indicates if a bond is heavily traded in the last 2 days, with -1 for high selling volume and +1 for high buying volume.
- benchmark_price_difference: Compares a bond's quoted price to the benchmark, ranging from -1 (below benchmark) to +1 (above benchmark), indicating price stress.
- trade_count_imbalance: Difference in buy and sell trades of a security over 2 days, from -1 (more sells) to +1 (more buys).
- one_sided_trade_indicator: Imba

In [None]:
def transform(row):
  global base_instruction, list_option_choices, list_stress_index_bucket, bucket_col,base_answer
  if row.name==0:
    return base_instruction,base_answer
  else:
    global feature_cols
    instruction_prompt=base_instruction
    instruction_prompt+=f'''###Question:
Given the feature value dict as:
{str(row[feature_cols].to_dict())}
Return the correct option for classifying stress index from the following options based on above features. Do not generate anything else apart from the option.
'''
    choice_prompt=''''''
    for i,j in zip(list_option_choices,shuffle_list(list_stress_index_bucket)):
        choice_prompt+=f'({i}) {j}\n'
        if j==row[bucket_col]:
          answer=i
    instruction_prompt+=choice_prompt
    instruction_prompt+='''
###Answer:
'''
    return instruction_prompt,answer


In [None]:
df_stress_index[['row_prompt','answer_label']]=df_stress_index.apply(lambda x:transform(x),axis=1,result_type='expand')

In [None]:
df_stress_index.to_csv("df_stress_index_hf_prompt.csv", index = False)

In [16]:
df_stress_index_hf_prompt=pd.read_csv('df_stress_index_hf_prompt.csv',index_col=False)

In [17]:
data_splits = {'test': df_stress_index_hf_prompt}
df_stress_index_hf_dataset = {split: Dataset.from_pandas(pd.DataFrame(df_stress_index_hf_prompt)) for split, data in data_splits.items()}

{'test':       news_sentiment  broker_count_imbalance  volume_indicator  \
 0           0.393144                0.124362          0.677889   
 1           0.772496                0.369376         -0.877238   
 2          -0.189353                0.190013         -0.485352   
 3           0.219090               -0.577930          0.876170   
 4           0.038766                0.474529          0.345526   
 ...              ...                     ...               ...   
 4995       -0.502263                0.409759         -0.087573   
 4996       -0.444850               -0.969700          0.050369   
 4997        0.520867                0.304688         -0.802427   
 4998       -0.251193                0.288159         -0.617303   
 4999       -0.167196                0.201170         -0.545743   
 
       benchmark_price_difference  trade_count_imbalance  \
 0                       0.497776              -0.720698   
 1                       0.139748               0.851864   
 2    