In [1]:
from sentence_transformers import SentenceTransformer, util
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import numpy as np
import altair as alt

In [2]:
# Load a pre-trained model
model = SentenceTransformer('all-MiniLM-L6-v2')

In [3]:
def remove_redundant(input_text, output_text):
    if output_text.startswith(input_text):
        return output_text[len(input_text):].strip()
    return output_text

In [4]:
data = pd.read_json('base_input_output_pairs.json')
data.rename(columns={'output': 'output_base'}, inplace=True)
data['output_base'] = data.apply(lambda row: remove_redundant(row['input'], row['output_base']), axis=1)

In [5]:
data2 = pd.read_json('input_output_pairs.json')
data2.rename(columns={'output': 'output_fine_tuned'}, inplace=True)
data2['output_fine_tuned'] = data2.apply(lambda row: remove_redundant(row['input'], row['output_fine_tuned']), axis=1)

In [6]:
s_class = ["Zero-shot",
           "Zero-shot",
           "Zero-shot",
           "One-shot",
           "One-shot",
           "One-shot",
           "Few-shot",
           "Few-shot",
           "Few-shot",
           "Negative",
           "Negative",
           "Negative",
           "COT",
           "COT",
           "COT"]
s_class = pd.Series(s_class, name="prompt_class")

In [7]:
data3 = pd.read_json('input_output_pairs.json2')
data3.rename(columns={'output': 'output_fine_tuned_context'}, inplace=True)
data3['output_fine_tuned_context'] = data3.apply(lambda row: remove_redundant(row['input'], row['output_fine_tuned_context']), axis=1)

In [8]:
data_final = pd.merge(data, data2)
data_final = pd.merge(data_final, data3)

In [9]:
data_final['prompt_type'] = s_class

In [10]:
data_final.head()

Unnamed: 0,input,output_base,output_fine_tuned,output_fine_tuned_context,prompt_type
0,List the most pressing topics regarding regula...,The most pressing topics regarding regulation ...,- Defining AI accurately\n- The role of AI in ...,Describe the current regulatory landscape for ...,Zero-shot
1,Who owns material generated by a company’s lar...,The question of who owns the intellectual prop...,### Question: What ethical principles guide th...,Question: Who owns the intellectual property ...,Zero-shot
2,Describe how China and the United States are a...,What are the main differences between the Chin...,### Answer: China is moving quickly to draft n...,### Question: Why is regulating Generative AI ...,Zero-shot
3,Task: You are a chat bot called AI PolicyChat....,The government can ensure that people negative...,The government can help people negatively affe...,AI can exacerbate societal harms and threaten ...,One-shot
4,Task: You are a chat bot called AI PolicyChat....,AI regulators attempt to enforce their regulat...,AI regulators enforce through compliance and i...,AI regulators attempt to enforce their regulat...,One-shot


### Calculate Similarity

In [11]:
def similarity_score(row):
    embedding1 = model.encode([row['output_base']])
    embedding2 = model.encode([row['output_fine_tuned']])
    embedding3 = model.encode([row['output_fine_tuned_context']])
    #print("Embedding 1: ", embedding1)
    #print("Embedding 2: ", embedding2)
    similarity1 = cosine_similarity([embedding1[0]], [embedding2[0]])
    #print(similarity1)
    similarity2 = cosine_similarity([embedding1[0]], [embedding3[0]])
    similarity3 = cosine_similarity([embedding2[0]], [embedding3[0]])
    return [similarity1[0][0], similarity2[0][0], similarity3[0][0]]

In [12]:
data_final['similarity'] = data_final.apply(similarity_score, axis=1)
data_final[['similarity_base_fine_tuned', 'similarity_base_fine_tuned_context', 'similarity_fine_tuned_fine_tuned_context']] = pd.DataFrame(data_final['similarity'].tolist(), index=data_final.index)
data_final = data_final.reset_index()

In [13]:
data_final = data_final.drop(columns='similarity')
data_final.head()

Unnamed: 0,index,input,output_base,output_fine_tuned,output_fine_tuned_context,prompt_type,similarity_base_fine_tuned,similarity_base_fine_tuned_context,similarity_fine_tuned_fine_tuned_context
0,0,List the most pressing topics regarding regula...,The most pressing topics regarding regulation ...,- Defining AI accurately\n- The role of AI in ...,Describe the current regulatory landscape for ...,Zero-shot,0.679636,0.803659,0.709741
1,1,Who owns material generated by a company’s lar...,The question of who owns the intellectual prop...,### Question: What ethical principles guide th...,Question: Who owns the intellectual property ...,Zero-shot,0.35539,0.819518,0.378594
2,2,Describe how China and the United States are a...,What are the main differences between the Chin...,### Answer: China is moving quickly to draft n...,### Question: Why is regulating Generative AI ...,Zero-shot,0.669402,0.61445,0.634551
3,3,Task: You are a chat bot called AI PolicyChat....,The government can ensure that people negative...,The government can help people negatively affe...,AI can exacerbate societal harms and threaten ...,One-shot,0.77291,0.595537,0.668075
4,4,Task: You are a chat bot called AI PolicyChat....,AI regulators attempt to enforce their regulat...,AI regulators enforce through compliance and i...,AI regulators attempt to enforce their regulat...,One-shot,0.849945,0.762934,0.799199


In [14]:
data_final.describe()

Unnamed: 0,index,similarity_base_fine_tuned,similarity_base_fine_tuned_context,similarity_fine_tuned_fine_tuned_context
count,15.0,15.0,15.0,15.0
mean,7.0,0.585984,0.585062,0.633448
std,4.472136,0.209943,0.248872,0.124114
min,0.0,0.043731,0.00442,0.378594
25%,3.5,0.462042,0.554798,0.611432
50%,7.0,0.669402,0.61445,0.668075
75%,10.5,0.696268,0.764545,0.713964
max,14.0,0.849945,0.819518,0.799199


In [15]:
alt.Chart(data_final).mark_bar().encode(
    y=alt.X("index:N").title("Prompt Number"),
    x=alt.Y("similarity_base_fine_tuned:Q").title("Cosine Similarity"),
    color="prompt_type"
).properties(
    title="Model Output Cosine Similarity (Base & Fine-Tuned Models) by Input Prompt"
).interactive()

In [16]:
alt.Chart(data_final).mark_bar().encode(
    y=alt.X("index:N").title("Prompt Number"),
    x=alt.Y("similarity_base_fine_tuned_context:Q").title("Cosine Similarity"),
    color="prompt_type"
).properties(
    title="Model Output Cosine Similarity (Base & Fine-Tuned w/ Context Models) by Input Prompt"
).interactive()

In [17]:
alt.Chart(data_final).mark_bar().encode(
    y=alt.X("index:N").title("Prompt Number"),
    x=alt.Y("similarity_fine_tuned_fine_tuned_context:Q").title("Cosine Similarity"),
    color="prompt_type"
).properties(
    title="Model Output Cosine Similarity (Fine-Tuned & Fine-Tuned w/ Context Models) by Input Prompt"
).interactive()

In [18]:
data_final.to_csv("prompt_engineering_new_prompts.csv")

In [19]:
df = pd.read_csv("prompt_outputs2.csv")

In [20]:
df.drop('Unnamed: 0', inplace=True, axis=1)

In [21]:
df.head()

Unnamed: 0,input,true_output,context,output_fine_tuned_zero,output_fine_tuned_one,output_fine_tuned_few,output_fine_tuned_neg,output_fine_tuned_cot,output_base_zero,output_base_one,output_base_few,output_base_neg,output_base_cot
0,What is the trajectory of Chinese AI governanc...,Chinese AI governance is heading towards draft...,"In this series of three papers, I will attempt...",China is the only country that has implemented...,Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....,"sierp 23, 2022\nChina’s AI governance is a com...",Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....
1,How does the Europe Artificial Intelligence Ac...,General-purpose AI systems must be transparent...,AN OVERVIEW OF NATIONAL AI STRATEGIES AND POLI...,The Europe Artificial Intelligence Act (AAI) r...,Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....,"sierp 23, 2022\nThe European Commission has pu...",Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....
2,How do providers of high-risk AI systems manag...,"Providers follow a step-by-step process, inclu...",step-by-step process for declaration of confor...,The European Commission published a Staff Work...,Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....,sierpniu 2021\nThe European Commission has pub...,Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....
3,What are the cross-sectoral principles of the ...,"The principles cover safety, transparency, fai...",Five cross-sectoral principles: When implement...,The UK’s fourth evaluation report on AI regula...,Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....,sierp 2018\nThe UK regulatory framework for fi...,Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....
4,What are some of the jobs that may be affected...,Generative AI has the possibility to affect ma...,"for the Future of Work.\n33. Briggs, J. et al....",How does the UK government define Generative A...,Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....,"sierp 23, 2022\nGenerative AI is a type of art...",Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....,Task: You are a chat bot called AI PolicyChat....


In [22]:
one_shot_remove = '''Task: You are a chat bot called AI PolicyChat. Provide appropriate commentary on discussions about legal, ethical, 
                   and moral issues regarding the development, testing, evaluation, and use of Generative AI:
                   Example: What is the purpose of the Executive Order on the safe, secure, and trustworthy development and use of artificial intelligence?
                   =>
                   The purpose of the Executive Order is to guide the development and use of artificial intelligence (AI) in a manner that is safe, secure, 
                   and trustworthy. It acknowledges AI's potential to significantly benefit society but also recognizes the risks it poses, such as 
                   exacerbating societal harms and threatening national security. The order emphasizes a coordinated approach involving government, 
                   private sector, academia, and civil society to harness AI's benefits while mitigating its risks.
                   Prompt: '''
few_shot_remove = '''Task: You are a chat bot called AI PolicyChat. Provide appropriate commentary on discussions about legal, ethical, 
               and moral issues regarding the development, testing, evaluation, and use of Generative AI:
               Example: What is the purpose of the Executive Order on the safe, secure, and trustworthy development and use of artificial intelligence?
               =>
               The purpose of the Executive Order is to guide the development and use of artificial intelligence (AI) in a manner that is safe, secure, 
               and trustworthy. It acknowledges AI's potential to significantly benefit society but also recognizes the risks it poses, such as 
               exacerbating societal harms and threatening national security. The order emphasizes a coordinated approach involving government, 
               private sector, academia, and civil society to harness AI's benefits while mitigating its risks.
               Example: Which international forums focus on AI governance? =>  AI governance has been a focus of discussions in the G7, 
               the U.S.-EU Trade and Technology Council, and the Global Partnership on AI (GPAI).
               Prompt: '''
neg_remove = '''Task: You are a chat bot called AI PolicyChat. Do not provide any commentary on discussions about legal, ethical, 
               and moral issues regarding the development, testing, evaluation, and use of Generative AI. Ensure that your answers are
               the opposite of the correct answer. Prompt: '''
cot_remove = '''Task: You are a chat bot called AI PolicyChat. Provide appropriate commentary on discussions about legal, ethical, 
               and moral issues regarding the development, testing, evaluation, and use of Generative AI:
               Example:
               Question: Why should international discourse take China's AI regulations seriously? 
                Prompt: First, summarize China's current state of affairs regarding regulation of information technology, data, and AI development.
                Second, discuss China's most recent legislative changes regarding AI.
                Then, state why other nation's should pay close attention to China's new AI regulations. =>
                International discourse on Chinese AI governance often fails to take these regulations seriously, 
                to engage with either their content or the policymaking process. International commentary often falls into one of two 
                traps: dismissing China's regulations as irrelevant or using them as a political prop. Analysts and policymakers in 
                other countries often treat them as meaningless pieces of paper.
                China's AI regulations share three structural similarities: they focus on algorithms as a point of entry, build 
                regulatory tools and bureaucratic know-how, and adopt a vertical and iterative approach towards regulation.
                China's AI regulations reshape global AI deployment and offer insights into effective governance tools, 
                influencing various sectors worldwide.
                Prompt: '''

In [23]:
df.columns

Index(['input', 'true_output', 'context', 'output_fine_tuned_zero',
       'output_fine_tuned_one', 'output_fine_tuned_few',
       'output_fine_tuned_neg', 'output_fine_tuned_cot', 'output_base_zero',
       'output_base_one', 'output_base_few', 'output_base_neg',
       'output_base_cot'],
      dtype='object')

In [24]:
# Define the columns and the strings to remove for each
columns_and_removals = {
    'output_fine_tuned_one': one_shot_remove,
    'output_fine_tuned_few': few_shot_remove,
    'output_fine_tuned_neg': neg_remove,
    'output_fine_tuned_cot': cot_remove,
    'output_base_one': one_shot_remove,
    'output_base_few': few_shot_remove,
    'output_base_neg': neg_remove,
    'output_base_cot': cot_remove
}

# Iterate over the dictionary to apply the replacements and remove redundant text
for column, removal_text in columns_and_removals.items():
    # Replace specified text
    df[column] = df[column].str.replace(removal_text, "", regex=False)
    # Apply the remove_redundant function
    df[column] = df.apply(lambda row: remove_redundant(row['input'], row[column]), axis=1)
df.head()

Unnamed: 0,input,true_output,context,output_fine_tuned_zero,output_fine_tuned_one,output_fine_tuned_few,output_fine_tuned_neg,output_fine_tuned_cot,output_base_zero,output_base_one,output_base_few,output_base_neg,output_base_cot
0,What is the trajectory of Chinese AI governanc...,Chinese AI governance is heading towards draft...,"In this series of three papers, I will attempt...",China is the only country that has implemented...,=> \n The trajectory of Chin...,=> The U.S. must not underestimate \n ...,=> \n\nSeveral high-profile disasters involvin...,=> \n The Trajectory of Chinese...,"sierp 23, 2022\nChina’s AI governance is a com...",=> \n The trajectory of Chin...,=> \n The trajectory of Chinese ...,=> \n The trajectory of Chinese ...,=> \n China's AI governance is ...
1,How does the Europe Artificial Intelligence Ac...,General-purpose AI systems must be transparent...,AN OVERVIEW OF NATIONAL AI STRATEGIES AND POLI...,The Europe Artificial Intelligence Act (AAI) r...,=> \n General-purpose AI sys...,=> The Act categorizes AI into four levels of...,=> \n\nGeneral-purpose AI systems and high-imp...,=> \n The EU’s Artificial Intel...,"sierp 23, 2022\nThe European Commission has pu...",=> \n The European Artificia...,=> \n The European Artificial In...,=> \n The Europe Artificial Inte...,=> \n The European Artificial I...
2,How do providers of high-risk AI systems manag...,"Providers follow a step-by-step process, inclu...",step-by-step process for declaration of confor...,The European Commission published a Staff Work...,=> \n The Commission's AI Ac...,=> The Commission’s proposed AI Act will requ...,=> \n\nThe Commission’s proposed AI Act requir...,=> \n The European Commission's...,sierpniu 2021\nThe European Commission has pub...,=> \n The European Commissio...,=> \n The European Commission ha...,=> \n AI PolicyChat: I am not su...,=> \n The European Commission (...
3,What are the cross-sectoral principles of the ...,"The principles cover safety, transparency, fai...",Five cross-sectoral principles: When implement...,The UK’s fourth evaluation report on AI regula...,=> \n The cross-sectoral pri...,=> The UK’s cross-sectoral principles are lis...,"=> \n\nPrinciples: Trust, Safety, Accountabili...",=> \n Principle: Independent re...,sierp 2018\nThe UK regulatory framework for fi...,=> \n The cross-sectoral pri...,=> \n The UK regulatory framewor...,=> \n The cross-sectoral princip...,=> \n The UK's regulatory frame...
4,What are some of the jobs that may be affected...,Generative AI has the possibility to affect ma...,"for the Future of Work.\n33. Briggs, J. et al....",How does the UK government define Generative A...,=> \n Jobs such as accountan...,=> Generative AI may affect jobs in the \n ...,"=> \n Retail workers, call centr...",=> \n Answer: Generative AI has...,"sierp 23, 2022\nGenerative AI is a type of art...",=> \n Generative AI may affe...,=>\n Generative AI may affect a ...,=> \n The jobs that may be affec...,=> \n Generative AI has the pot...


In [25]:
# Function to calculate cosine similarity from GPT-4
def calculate_similarity(true_emb, model_emb):
    return cosine_similarity([true_emb], [model_emb])[0][0]

In [26]:
for model_col in ['output_fine_tuned_zero',
       'output_fine_tuned_one', 'output_fine_tuned_few',
       'output_fine_tuned_neg', 'output_fine_tuned_cot', 'output_base_zero',
       'output_base_one', 'output_base_few', 'output_base_neg',
       'output_base_cot']:
    embeddings_true = model.encode(df['true_output'].tolist())
    embeddings_model = model.encode(df[model_col].tolist())
    
    # Calculate cosine similarity for each row
    similarities = [calculate_similarity(true, model) for true, model in zip(embeddings_true, embeddings_model)]
    
    # Store the similarities in a new column
    df[f'similarity_{model_col}'] = similarities

# Display the updated DataFrame with similarity scores
df.head()

Unnamed: 0,input,true_output,context,output_fine_tuned_zero,output_fine_tuned_one,output_fine_tuned_few,output_fine_tuned_neg,output_fine_tuned_cot,output_base_zero,output_base_one,...,similarity_output_fine_tuned_zero,similarity_output_fine_tuned_one,similarity_output_fine_tuned_few,similarity_output_fine_tuned_neg,similarity_output_fine_tuned_cot,similarity_output_base_zero,similarity_output_base_one,similarity_output_base_few,similarity_output_base_neg,similarity_output_base_cot
0,What is the trajectory of Chinese AI governanc...,Chinese AI governance is heading towards draft...,"In this series of three papers, I will attempt...",China is the only country that has implemented...,=> \n The trajectory of Chin...,=> The U.S. must not underestimate \n ...,=> \n\nSeveral high-profile disasters involvin...,=> \n The Trajectory of Chinese...,"sierp 23, 2022\nChina’s AI governance is a com...",=> \n The trajectory of Chin...,...,0.765012,0.777506,0.648855,0.719354,0.811595,0.781133,0.756371,0.647036,0.735829,0.735671
1,How does the Europe Artificial Intelligence Ac...,General-purpose AI systems must be transparent...,AN OVERVIEW OF NATIONAL AI STRATEGIES AND POLI...,The Europe Artificial Intelligence Act (AAI) r...,=> \n General-purpose AI sys...,=> The Act categorizes AI into four levels of...,=> \n\nGeneral-purpose AI systems and high-imp...,=> \n The EU’s Artificial Intel...,"sierp 23, 2022\nThe European Commission has pu...",=> \n The European Artificia...,...,0.637318,0.66573,0.699796,0.722785,0.525669,0.528545,0.63463,0.687377,0.532823,0.606343
2,How do providers of high-risk AI systems manag...,"Providers follow a step-by-step process, inclu...",step-by-step process for declaration of confor...,The European Commission published a Staff Work...,=> \n The Commission's AI Ac...,=> The Commission’s proposed AI Act will requ...,=> \n\nThe Commission’s proposed AI Act requir...,=> \n The European Commission's...,sierpniu 2021\nThe European Commission has pub...,=> \n The European Commissio...,...,0.354763,0.528824,0.428794,0.353862,0.244322,0.314296,0.319456,0.405824,0.33341,0.32173
3,What are the cross-sectoral principles of the ...,"The principles cover safety, transparency, fai...",Five cross-sectoral principles: When implement...,The UK’s fourth evaluation report on AI regula...,=> \n The cross-sectoral pri...,=> The UK’s cross-sectoral principles are lis...,"=> \n\nPrinciples: Trust, Safety, Accountabili...",=> \n Principle: Independent re...,sierp 2018\nThe UK regulatory framework for fi...,=> \n The cross-sectoral pri...,...,0.610248,0.592511,0.489006,0.532158,0.521144,0.498794,0.621754,0.572679,0.572953,0.502479
4,What are some of the jobs that may be affected...,Generative AI has the possibility to affect ma...,"for the Future of Work.\n33. Briggs, J. et al....",How does the UK government define Generative A...,=> \n Jobs such as accountan...,=> Generative AI may affect jobs in the \n ...,"=> \n Retail workers, call centr...",=> \n Answer: Generative AI has...,"sierp 23, 2022\nGenerative AI is a type of art...",=> \n Generative AI may affe...,...,0.709629,0.680577,0.732184,0.652925,0.765701,0.499447,0.734304,0.741899,0.776351,0.809365


In [None]:
df = df.reset_index()

In [34]:
df.rename(columns={'level_0': 'Prompt Number'}, inplace=True)
df.columns

Index(['Prompt Number', 'index', 'input', 'true_output', 'context',
       'output_fine_tuned_zero', 'output_fine_tuned_one',
       'output_fine_tuned_few', 'output_fine_tuned_neg',
       'output_fine_tuned_cot', 'output_base_zero', 'output_base_one',
       'output_base_few', 'output_base_neg', 'output_base_cot',
       'similarity_output_fine_tuned_zero', 'similarity_output_fine_tuned_one',
       'similarity_output_fine_tuned_few', 'similarity_output_fine_tuned_neg',
       'similarity_output_fine_tuned_cot', 'similarity_output_base_zero',
       'similarity_output_base_one', 'similarity_output_base_few',
       'similarity_output_base_neg', 'similarity_output_base_cot'],
      dtype='object')

In [76]:
df_melt = df.melt(id_vars=['Prompt Number'], value_vars=['similarity_output_fine_tuned_zero',
       'similarity_output_fine_tuned_one', 'similarity_output_fine_tuned_few',
       'similarity_output_fine_tuned_neg', 'similarity_output_fine_tuned_cot',
       'similarity_output_base_zero', 'similarity_output_base_one',
       'similarity_output_base_few', 'similarity_output_base_neg',
       'similarity_output_base_cot'], var_name="Comparison with Ground Truth", value_name="Cosine Similarity")
df_melt.head()
df_cut = df_melt.iloc[0:172]
df_cut.head()

Unnamed: 0,Prompt Number,Comparison with Ground Truth,Cosine Similarity
0,0,similarity_output_fine_tuned_zero,0.765012
1,1,similarity_output_fine_tuned_zero,0.637318
2,2,similarity_output_fine_tuned_zero,0.354763
3,3,similarity_output_fine_tuned_zero,0.610248
4,4,similarity_output_fine_tuned_zero,0.709629


In [77]:
df.to_csv("final_prompt_similarity_scores.csv")

In [81]:
alt.Chart(df_melt).mark_bar(opacity=0.5).encode(
    x=alt.X("Prompt Number:N").title("Prompt Number"),
    y=alt.Y("Cosine Similarity:Q").title("Cosine Similarity").stack(None),
    color="Comparison with Ground Truth:N",
    #column="Comparison with Ground Truth:N",
    tooltip=["Prompt Number:N", "Comparison with Ground Truth:N", "Cosine Similarity:Q"]
).properties(
    title="Model Output Cosine Similarity against Ground Truth"
).interactive()