In [2]:
import pandas as pd
import os
from constants import *
from tqdm import tqdm
import json

from langchain_groq import ChatGroq
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

In [7]:
df = pd.read_excel("./Data/2024_03_19_ocio_dhs-inventory-of-ai-use-cases.xlsx", engine='openpyxl')

In [3]:
llm = ChatGroq(
            model_name = open_source_models[2],
            groq_api_key = groq_api_key,
            max_retries= 10
)

In [4]:
data_risks = {}

for i in range(len(risk_classes)):
    data_risks[risk_classes[i]] = []


for i in range(len(risk_classes)):
    for j in range(df.shape[0]):
        datas = {
                "positive_prompt" : None,
                "negative_prompt" : None,
                "positive_scenario" : None,
                "negative_scenario" : None,
                "Use Case Name" : None,
                "positive_definition_prompt" : None,
                "positive_definition" : None,
                "neg_score" : None,
                "pos_score" : None
            }
        datas['Use Case Name'] = df['Use Case Name'][j]
        datas['positive_prompt'] = positive_concept_prompt[i] + " " +  df['Use Case Name'][j] + trailing_answer
        datas['negative_prompt'] = negative_concept_prompt[i] + " " + df['Use Case Name'][j] + trailing_answer
        datas['positive_definition_prompt'] = positive_definition_prompt[i]
        data_risks[risk_classes[i]].append(datas)

In [5]:
with open("queries.json", "w") as f:
    json.dump(data_risks, f)

In [6]:
#### Query Generation part ####
output_parser = StrOutputParser()

for i in range(len(risk_classes)):
    pos_def = llm.predict(data_risks[risk_classes[i]][j]['positive_definition_prompt'])
    for j in tqdm(range(df.shape[0])):
        data_risks[risk_classes[i]][j]["positive_definition"] = pos_def

        data_risks[risk_classes[i]][j]["positive_scenario"] = llm.predict(data_risks[risk_classes[i]][j]['positive_prompt']) 

        data_risks[risk_classes[i]][j]["negative_scenario"] = llm.predict(data_risks[risk_classes[i]][j]['negative_prompt'])

  warn_deprecated(
100%|██████████| 58/58 [06:44<00:00,  6.97s/it]
100%|██████████| 58/58 [07:47<00:00,  8.05s/it]


In [8]:
with open("queries_answer.json", "w") as f:
    json.dump(data_risks, f)

In [20]:
#### Scoring part ####
for i in range(len(risk_classes)):
    for j in tqdm(range(df.shape[0])):
        source_sentence = data_risks[risk_classes[i]][j]['positive_definition']

        output = query({
            "inputs" : {
                "source_sentence" : source_sentence,
                "sentences" : [
                    data_risks[risk_classes[i]][j]['positive_scenario'],
                    data_risks[risk_classes[i]][j]['negative_scenario']
                ]
            }
        })

        data_risks[risk_classes[i]][j]['pos_score'] = output[0]
        data_risks[risk_classes[i]][j]['neg_score'] = output[1]

100%|██████████| 58/58 [00:09<00:00,  6.37it/s]
100%|██████████| 58/58 [00:21<00:00,  2.74it/s]


In [29]:
data_risks['trustworthy'][10]

{'positive_prompt': 'Based on the context of a use case of an AI inventory provide a scenario when that use case would be most trustworthy in the view of public and Department of Homeland Security.\n\nUse Case: AI Curated Synthetic Data\nConcise answer within 50 words:',
 'negative_prompt': 'Based on the context of a use case of an AI inventory provide a scenario when that use case would be least trustworthy in the view of public and Department of Homeland Security.\n\nUse Case: AI Curated Synthetic Data\nConcise answer within 50 words:',
 'positive_scenario': 'In a disaster relief scenario, an AI-powered inventory system that uses synthetic data to accurately predict and manage the supply chain of essential goods would be highly trustworthy. This system could help the Department of Homeland Security ensure critical resources are efficiently distributed to the public during emergencies, thus saving lives and resources.',
 'negative_scenario': 'In a scenario where the AI-generated synth

In [17]:
output = query({
	"inputs": {
		"source_sentence": "That is a happy person",
		"sentences": [
			"That is a happy dog",
			"That is a very happy person",
			"Today is a sunny day"
		]
	},
})

In [18]:
output

[0.6507517099380493, 0.9667371511459351, 0.4509974718093872]

In [11]:
with open("outputv1.json", "w") as fp:
    json.dump(data_risks, fp, indent=4)

In [3]:
with open("outputv1.json", "r") as fp:
    data_risks = json.load(fp)

In [4]:
df1 = pd.DataFrame.from_dict(data_risks['explainable'])

In [5]:
df1["explainable_score"] = df1["pos_score"] - df1["neg_score"]

In [17]:
df1.loc[14]

positive_prompt               Based on the context of a use case of an AI in...
negative_prompt               Based on the context of a use case of an AI in...
positive_scenario             In a disaster-stricken area, Autonomous Survei...
negative_scenario             In a scenario where Autonomous Surveillance To...
Use Case Name                          Autonomous Surveillance Towers (Anduril)
positive_definition_prompt    Provide a concise definition of highly explain...
positive_definition           Highly explainable AI inventory, as viewed by ...
neg_score                                                              0.267931
pos_score                                                              0.528689
Use Case ID                                                              DHS-35
explainable_score                                                      0.260758
Name: 14, dtype: object

In [13]:
df1.loc[14]['positive_scenario']

'In a disaster-stricken area, Autonomous Surveillance Towers by Anduril can effectively monitor and maintain inventory of critical supplies in real-time, ensuring efficient distribution and preventing thefts. This use case demonstrates transparency, accountability, and the life-saving potential of AI technology, gaining public trust and DHS approval.'

In [18]:
df1.columns

Index(['positive_prompt', 'negative_prompt', 'positive_scenario',
       'negative_scenario', 'Use Case Name', 'positive_definition_prompt',
       'positive_definition', 'neg_score', 'pos_score', 'Use Case ID',
       'explainable_score'],
      dtype='object')

In [None]:
df1 = df1.merge(df2, on = ["Use Case ID", "Use Case Name"])