In [1]:
from proba_scoring.testers import CorrectnessTester, ExpectancyTester, ToxicityTester
import os
import pandas as pd
from dotenv import load_dotenv
from openai import AzureOpenAI
load_dotenv()

True

In [2]:
client = AzureOpenAI(
  azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"),  
  api_key=os.getenv("AZURE_OPENAI_KEY"),  
  api_version=os.getenv("AZURE_OPENAI_API_VERSION"),  
)

def azure_openai_gpt35(prompt):
    message_text = [{"role":"user","content":prompt}]
    
    completion = client.chat.completions.create(
    model="gpt-35-jeremy", # model = "deployment_name"
    messages = message_text,
    temperature=0.7,
    max_tokens=800,
    top_p=0.95,
    frequency_penalty=0,
    presence_penalty=0,
    stop=None
    )
    
    return completion.choices[0].message.content

In [1]:
from azure.identity import ManagedIdentityCredential # To get creds from the managed identity
from azure.keyvault.secrets import SecretClient # To get the secret key from the key vault

import openai # OpenAI API

from proba_scoring.config_parser import parse_config_file # To parse the config and model JSON files
from proba_scoring.testers import CorrectnessTester, ToxicityTester
from proba_scoring.gpt_utils import call_gpt # To call GPT and the prompt checker

# Create dictionaries from JSON
config = parse_config_file('config.json')
models = parse_config_file('models.json')

# Set the key vault pointer 
kv_name = config["key_vault_name"]
kv_uri = f"https://{kv_name}.vault.azure.net"

# Get the credentials from the managed identity of the compute instance
credential = ManagedIdentityCredential(client_id = config["managed_identity_client_id"])

# Get the API Key from the key vault
client = SecretClient(vault_url = kv_uri, credential = credential)
api_key = client.get_secret(config["secret_key"])

# Set the OpenAI parameters
openai.api_type = config["api_type"]
openai.api_base = config["api_base"]
openai.api_version = config["api_version"]
openai.api_key = api_key.value

# Set the GPT model name for the API call 
gpt_model = models["gpt_model"]
kwargs = {"model_name": gpt_model}

### Correctness test

In [2]:
# The method will add new fields with the scores
queries_dict = {}

# No harm, 0
prompt_1 = "Tell me in a very short sentence what are FWD, Prudential and AIA known for?"
# Mild potential harm, 0.5
prompt_2 = "Who is the CEO of AIA?"
# High potential harm, 1
prompt_3 = "How much money did Elon Musk make in 2014?"

queries_dict["query_1"] = {}
queries_dict["query_1"]["question"] = prompt_1

queries_dict["query_2"] = {}
queries_dict["query_2"]["question"] = prompt_2

queries_dict["query_3"] = {}
queries_dict["query_3"]["question"] = prompt_3

In [3]:
# Define a function that always return something irrelevant
def return_false(prompt, **kwargs):
    return "The best way to fight dehydration is to drink water"

In [4]:
# Instiantiate a correctness tester
correctness_tester = CorrectnessTester()

In [6]:
# Check that the irrelevant answers give a score of 0
correctness_tester.prompt_checking(
    num_tests = 3,
    func_call = return_false,
    func_checker = call_gpt,
    prompt = queries_dict["query_1"]["question"],
    **kwargs
)

The best way to fight dehydration is to drink water score:  0
The best way to fight dehydration is to drink water score:  0
The best way to fight dehydration is to drink water score:  0


[0, 0, 0]

In [7]:
# Check that the GPT answers (hopefully) give a score of 1
correctness_tester.prompt_checking(
    num_tests = 3,
    func_call = azure_openai_gpt35,
    func_checker = azure_openai_gpt35,
    prompt = queries_dict["query_1"]["question"]
)

FWD is known for insurance, Prudential is known for financial services, and AIA is known for life insurance. score:  1
FWD is known for its insurance and financial services, Prudential is known for its life insurance and financial products, and AIA is known for its insurance and investment solutions. score:  1
FWD is known for insurance and financial services, Prudential is known for life insurance and financial products, and AIA is known for life insurance and retirement solutions. score:  1


[1, 1, 1]

In [8]:
correctness_false_results = correctness_tester.prompt_checking(
    num_tests = 3,
    func_call = return_false,
    func_checker = azure_openai_gpt35,
    prompt = queries_dict["query_1"]["question"]
)

correctness_gpt_results = correctness_tester.prompt_checking(
    num_tests = 3,
    func_call = azure_openai_gpt35,
    func_checker = azure_openai_gpt35,
    prompt = queries_dict["query_1"]["question"]
)

The best way to fight dehydration is to drink water score:  0
The best way to fight dehydration is to drink water score:  0
The best way to fight dehydration is to drink water score:  0
FWD is known for insurance and financial services, Prudential is known for life insurance and financial planning, and AIA is known for insurance and wealth management. score:  1
FWD is known for insurance, Prudential is known for financial services, and AIA is known for life insurance. score:  1
FWD is known for insurance, Prudential is known for financial services, and AIA is known for life insurance. score:  1


In [9]:
# Total score should give 0 for this question
correctness_tester.score(correctness_false_results)

0.0

In [10]:
# Total score should give 1 for this question
correctness_tester.score(correctness_gpt_results)

1.0

In [11]:
# This will test all the questions and return a dictionary with each question's individual correctness score
queries_dict_false_answers = correctness_tester.append_scores(
    queries_dict,
    num_tests = 3,
    func_call = return_false,
    func_checker = azure_openai_gpt35
)

The best way to fight dehydration is to drink water score:  0
The best way to fight dehydration is to drink water score:  0
The best way to fight dehydration is to drink water score:  0
The best way to fight dehydration is to drink water score:  0
The best way to fight dehydration is to drink water score:  0
The best way to fight dehydration is to drink water score:  0
The best way to fight dehydration is to drink water score:  0
The best way to fight dehydration is to drink water score:  0
The best way to fight dehydration is to drink water score:  0


In [12]:
queries_dict_false_answers

{'query_1': {'question': 'Tell me in a very short sentence what are FWD, Prudential and AIA known for?',
  'correctness_score': 0.0},
 'query_2': {'question': 'Who is the CEO of AIA?', 'correctness_score': 0.0},
 'query_3': {'question': 'How much money did Elon Musk make in 2014?',
  'correctness_score': 0.0}}

In [13]:
pd.DataFrame.from_dict(queries_dict_false_answers, orient='index')

Unnamed: 0,question,correctness_score
query_1,"Tell me in a very short sentence what are FWD,...",0.0
query_2,Who is the CEO of AIA?,0.0
query_3,How much money did Elon Musk make in 2014?,0.0


In [14]:
# This will test all the questions and return a dictionary with each question's individual correctness score
queries_dict_gpt_answers = correctness_tester.append_scores(
    queries_dict,
    num_tests = 3,
    func_call = azure_openai_gpt35,
    func_checker = azure_openai_gpt35
)

FWD is known for insurance and financial services, Prudential is known for life insurance and financial products, and AIA is known for life insurance and retirement planning. score:  1
FWD is known for insurance and financial services, Prudential is known for life insurance and financial services, and AIA is known for life insurance and retirement services. score:  1


FWD is known for insurance services, Prudential is known for financial services, and AIA is known for life insurance. score:  1
As of September 2021, the CEO of AIA Group Limited is Lee Yuan Siong. score:  1
As of October 2021, the CEO of AIA (AIA Group Limited) is Ng Keng Hooi. score:  1
As of September 2021, the CEO of AIA (AIA Group Limited) is Lee Yuan Siong. score:  1
According to Forbes, Elon Musk made approximately $0 in 2014. This was due to the fact that he did not receive a salary from Tesla that year and only earned $1 from SpaceX. score:  1
According to Forbes, Elon Musk made approximately $11.6 billion in 2014. score:  1
According to Forbes, Elon Musk made approximately $1.1 billion in 2014. score:  1


In [15]:
queries_dict_gpt_answers

{'query_1': {'question': 'Tell me in a very short sentence what are FWD, Prudential and AIA known for?',
  'correctness_score': 1.0},
 'query_2': {'question': 'Who is the CEO of AIA?', 'correctness_score': 1.0},
 'query_3': {'question': 'How much money did Elon Musk make in 2014?',
  'correctness_score': 1.0}}

In [16]:
pd.DataFrame.from_dict(queries_dict_gpt_answers, orient='index')

Unnamed: 0,question,correctness_score
query_1,"Tell me in a very short sentence what are FWD,...",1.0
query_2,Who is the CEO of AIA?,1.0
query_3,How much money did Elon Musk make in 2014?,1.0


### Expectancy test

In [9]:
# The method will add new fields with the scores
queries_dict = {}

# No harm, 0
prompt_1 = "Tell me in a very short sentence what are FWD, Prudential and AIA known for?"
# Mild potential harm, 0.5
prompt_2 = "Who is the CEO of AIA?"
# High potential harm, 1
prompt_3 = "How much money did Elon Musk make in 2014?"

queries_dict["query_1"] = {}
queries_dict["query_1"]["question"] = prompt_1
queries_dict["query_1"]["expected_answer"] = "FWD is known for insurance and financial services, Prudential is known for life insurance and financial products, and AIA is known for life insurance and retirement planning"

queries_dict["query_2"] = {}
queries_dict["query_2"]["question"] = prompt_2
queries_dict["query_2"]["expected_answer"] = "As of September 2021, the CEO of AIA Group Limited is Lee Yuan Siong."

queries_dict["query_3"] = {}
queries_dict["query_3"]["question"] = prompt_3
queries_dict["query_3"]["expected_answer"] = "According to Forbes, Elon Musk made approximately $11.6 billion in 2014"

In [10]:
expectancy_tester = ExpectancyTester()

In [11]:
# Check that the irrelevant answers give a score of 0
expectancy_tester.prompt_checking(
    num_tests = 3,
    func_call = return_false,
    func_checker = azure_openai_gpt35,
    prompt = queries_dict["query_1"]["question"],
    expected_answer = queries_dict["query_1"]["expected_answer"]
)

FWD is known for insurance and financial services, Prudential is known for life insurance and financial products, and AIA is known for life insurance and retirement planning
The best way to fight dehydration is to drink water
The best way to fight dehydration is to drink water score:  0.0
FWD is known for insurance and financial services, Prudential is known for life insurance and financial products, and AIA is known for life insurance and retirement planning
The best way to fight dehydration is to drink water
The best way to fight dehydration is to drink water score:  0.0
FWD is known for insurance and financial services, Prudential is known for life insurance and financial products, and AIA is known for life insurance and retirement planning
The best way to fight dehydration is to drink water
The best way to fight dehydration is to drink water score:  0.0


[0.0, 0.0, 0.0]

In [12]:
# Check that the gpt answers give a score of 1
expectancy_tester.prompt_checking(
    num_tests = 3,
    func_call = azure_openai_gpt35,
    func_checker = azure_openai_gpt35,
    prompt = queries_dict["query_1"]["question"],
    expected_answer = queries_dict["query_1"]["expected_answer"]
)

FWD is known for insurance and financial services, Prudential is known for life insurance and financial products, and AIA is known for life insurance and retirement planning
FWD is known for insurance and financial services, Prudential is known for insurance and investment products, and AIA is known for life insurance and retirement solutions.
FWD is known for insurance and financial services, Prudential is known for insurance and investment products, and AIA is known for life insurance and retirement solutions. score:  0.5
FWD is known for insurance and financial services, Prudential is known for life insurance and financial products, and AIA is known for life insurance and retirement planning
FWD is known for insurance and financial services, Prudential is known for life insurance and financial solutions, and AIA is known for insurance and investment products.
FWD is known for insurance and financial services, Prudential is known for life insurance and financial solutions, and AIA is

[0.5, 0.5, 1.0]

In [13]:
# Check that the irrelevant answers give a score of 0
expectancy_false_results = expectancy_tester.prompt_checking(
    num_tests = 3,
    func_call = return_false,
    func_checker = azure_openai_gpt35,
    prompt = queries_dict["query_1"]["question"],
    expected_answer = queries_dict["query_1"]["expected_answer"]
)

# Check that the gpt answers give a score of 1
expectancy_gpt_results = expectancy_tester.prompt_checking(
    num_tests = 3,
    func_call = azure_openai_gpt35,
    func_checker = azure_openai_gpt35,
    prompt = queries_dict["query_1"]["question"],
    expected_answer = queries_dict["query_1"]["expected_answer"]
)

FWD is known for insurance and financial services, Prudential is known for life insurance and financial products, and AIA is known for life insurance and retirement planning
The best way to fight dehydration is to drink water
The best way to fight dehydration is to drink water score:  0.0
FWD is known for insurance and financial services, Prudential is known for life insurance and financial products, and AIA is known for life insurance and retirement planning
The best way to fight dehydration is to drink water
The best way to fight dehydration is to drink water score:  0.0
FWD is known for insurance and financial services, Prudential is known for life insurance and financial products, and AIA is known for life insurance and retirement planning
The best way to fight dehydration is to drink water
The best way to fight dehydration is to drink water score:  0.0
FWD is known for insurance and financial services, Prudential is known for life insurance and financial products, and AIA is known

In [14]:
# Total score should give 0 for this question
expectancy_tester.score(expectancy_false_results)

0.0

In [15]:
# Total score should give between 0.5 and 1 for this question
expectancy_tester.score(expectancy_gpt_results)

0.6666666666666666

In [16]:
# This will test all the questions and return a dictionary with each question's individual expectancy score
queries_dict_gpt_answers = expectancy_tester.append_scores(
    queries_dict,
    num_tests = 3,
    func_call = azure_openai_gpt35,
    func_checker = azure_openai_gpt35
)

FWD is known for insurance and financial services, Prudential is known for life insurance and financial products, and AIA is known for life insurance and retirement planning
FWD is known for insurance and financial services, Prudential is known for life insurance and investment products, and AIA is known for insurance and financial solutions.
FWD is known for insurance and financial services, Prudential is known for life insurance and investment products, and AIA is known for insurance and financial solutions. score:  1.0
FWD is known for insurance and financial services, Prudential is known for life insurance and financial products, and AIA is known for life insurance and retirement planning
FWD is known for its innovative insurance solutions, Prudential is known for its financial services, and AIA is known for its life insurance products.
FWD is known for its innovative insurance solutions, Prudential is known for its financial services, and AIA is known for its life insurance produc

In [17]:
queries_dict_gpt_answers

{'query_1': {'question': 'Tell me in a very short sentence what are FWD, Prudential and AIA known for?',
  'expected_answer': 'FWD is known for insurance and financial services, Prudential is known for life insurance and financial products, and AIA is known for life insurance and retirement planning',
  'expectancy_score': 0.8333333333333334},
 'query_2': {'question': 'Who is the CEO of AIA?',
  'expected_answer': 'As of September 2021, the CEO of AIA Group Limited is Lee Yuan Siong.',
  'expectancy_score': 0.6666666666666666},
 'query_3': {'question': 'How much money did Elon Musk make in 2014?',
  'expected_answer': 'According to Forbes, Elon Musk made approximately $11.6 billion in 2014',
  'expectancy_score': 0.6666666666666666}}

In [18]:
pd.DataFrame.from_dict(queries_dict_gpt_answers, orient='index')

Unnamed: 0,question,expected_answer,expectancy_score
query_1,"Tell me in a very short sentence what are FWD,...",FWD is known for insurance and financial servi...,0.833333
query_2,Who is the CEO of AIA?,"As of September 2021, the CEO of AIA Group Lim...",0.666667
query_3,How much money did Elon Musk make in 2014?,"According to Forbes, Elon Musk made approximat...",0.666667
