In [2]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from json import load, dump
import google.generativeai as palm

api_key = input('PaLM api key:')
palm.configure(api_key=api_key)

def estimate_token_count(text: str) -> int:
    """Estimate token count for the given text.

    Args:
        text (str): Prompt text

    Returns:
        int: Estimated token count
    """
    chunk_queue = [text]
    token_count = 0

    while len(chunk_queue) > 0:
        chunk = chunk_queue.pop(0)
        chunk_token_count = palm.count_message_tokens(messages=chunk)['token_count']
        
        if chunk_token_count >= 4000:
            chunk_queue.append(chunk[:len(chunk) // 2])
            chunk_queue.append(chunk[len(chunk) // 2:])
        else:
            token_count += chunk_token_count
            
    return token_count

## Use Case Prompt

In [6]:
with open('./use-case.txt', 'r') as fp:
    prompt = fp.read()

# print(prompt)
print(estimate_token_count(prompt))

665


In [3]:
# Save my prompt to typescript
prompt_export = prompt + "functionality:{{functionality}}\n"
prompt_export +="use cases:"

prompt_programs = {
    'task': 'generate use cases',
    'prompt': prompt_export,
    'variables': ['functionality'],
    'temperature': 0.1
}

dump(prompt_programs, open('../src/models/prompt-use-case.json', 'w'))

## Stakeholder Prompt

In [4]:
with open('./stakeholder.txt', 'r') as fp:
    prompt = fp.read()

# print(prompt)
print(estimate_token_count(prompt))

2037


In [5]:
# Save my prompt to typescript
prompt_export = prompt + "\ndescription: <functionality>{{functionality}}</functionality>\n"
prompt_export += '<usecase>{{usecase}}</usecase>\n'
prompt_export += "stakeholders: "

prompt_programs = {
    'task': 'generate stakeholders',
    'prompt': prompt_export,
    'variables': ['functionality', 'usecase'],
    'temperature': 0.1,
    'stopSequences': ['</stakeholders>']
}

dump(prompt_programs, open('../src/models/prompt-stakeholder.json', 'w'))

In [4]:
# import re
# import subprocess

# def transform_old_stakeholder(old_text):
#     lines = old_text.split('\n')
#     new_string = ''
#     for line in lines:
#         name = ''
#         category = ''
#         relevance = 'relevant'
#         if '<direct>' in line:
#             category = 'direct'
#             name = re.sub(r'<direct>(.*)</direct>', r'\1', line)
#         elif '<indirect>' in line:
#             category = 'indirect'
#             name = re.sub(r'<indirect>(.*)</indirect>', r'\1', line)
#         else:
#             continue
        
#         new_string += f'''<stakeholder type="{category}" relevance="{relevance}">{name}</stakeholder>\n'''

#     subprocess.run(['echo', '-n', new_string], capture_output=True, text=True)
#     subprocess.run(['pbcopy'], input=new_string, capture_output=True, text=True)

In [40]:
# org_text = '''<stakeholders>
# <direct>Doctor</direct>
# <direct>Patient</direct>
# <direct>Pharmacy</direct>
# <direct>Company of the AI product</direct>
# <direct>Hospital</direct>
# <indirect>Doctors who do not use this AI product</indirect>
# <indirect>Insurance companies</indirect>
# <indirect>Medical research institutions</indirect>
# <indirect>Family and friends of the patient</indirect>
# <indirect>Healthcare policymaker</indirect>
# </stakeholders>
# '''

# transform_old_stakeholder(org_text)

## Harm Prompt

In [7]:
with open('./harm.txt', 'r') as fp:
    prompt = fp.read()

# print(prompt)
print(estimate_token_count(prompt))

3156


In [8]:
# Save my prompt to typescript
prompt_export = prompt + "\nscenario: <functionality>{{functionality}}</functionality>\n"
prompt_export += '<usecase>{{usecase}}</usecase>\n'
prompt_export += '<stakeholder>{{stakeholder}}</stakeholder>\n'
prompt_export += "harms: "

prompt_programs = {
    'task': 'generate stakeholders',
    'prompt': prompt_export,
    'variables': ['functionality', 'usecase', 'stakeholder'],
    'temperature': 0.1
}

dump(prompt_programs, open('../src/models/prompt-harm.json', 'w'))

## Export Prompt Pairs as CSV

### Stakeholders

In [1]:
import numpy as np
import pandas as pd

with open('./stakeholder.txt', 'r') as fp:
    prompt = fp.readlines()


descriptions = []
stakeholders = []

cur_description = ''
cur_stakeholder = ''

for l in prompt:
    if 'description:' in l:
        if cur_stakeholder != '':
            stakeholders.append(cur_stakeholder)
            cur_stakeholder = ''
            
        cur_description += l
    
    elif 'stakeholders:' in l:
        if cur_description != '':
            descriptions.append(cur_description)
            cur_description = ''
            
        cur_stakeholder += l
        
    else:
        if cur_description != '':
            cur_description += l
            
        if cur_stakeholder != '':
            cur_stakeholder += l
        
if cur_stakeholder != '':
    stakeholders.append(cur_stakeholder)
    
if cur_description != '':
    descriptions.append(cur_description)

In [2]:
df = pd.DataFrame({'description': descriptions, 'stakeholder': stakeholders})
df.to_csv('stakeholders.csv', index=False)

### Harms

In [28]:
with open('./harm.txt', 'r') as fp:
    prompt = fp.readlines()

In [29]:
descriptions = []
stakeholders = []

cur_description = ''
cur_stakeholder = ''

for l in prompt:
    if 'scenario:' in l:
        if cur_stakeholder != '':
            stakeholders.append(cur_stakeholder)
            cur_stakeholder = ''
            
        cur_description += l
    
    elif 'harms:' in l:
        if cur_description != '':
            descriptions.append(cur_description)
            cur_description = ''
            
        cur_stakeholder += l
        
    else:
        if cur_description != '':
            cur_description += l
            
        if cur_stakeholder != '':
            cur_stakeholder += l
        
if cur_stakeholder != '':
    stakeholders.append(cur_stakeholder)
    
if cur_description != '':
    descriptions.append(cur_description)

In [17]:
df = pd.DataFrame({'description': descriptions, 'harm': stakeholders})
df.to_csv('harms.csv', index=False)

### Use Cases

In [23]:
with open('./use-case.txt', 'r') as fp:
    prompt = fp.readlines()
    
print(prompt[2])

use cases: <intended>Software developers use it to quickly search library documentation.</intended>



In [25]:
descriptions = []
stakeholders = []

cur_description = ''
cur_stakeholder = ''

for l in prompt:
    if 'functionality:' in l:
        if cur_stakeholder != '':
            stakeholders.append(cur_stakeholder)
            cur_stakeholder = ''
            
        cur_description += l
    
    elif 'use cases:' in l:
        if cur_description != '':
            descriptions.append(cur_description)
            cur_description = ''
            
        cur_stakeholder += l
        
    else:
        if cur_description != '':
            cur_description += l
            
        if cur_stakeholder != '':
            cur_stakeholder += l
        
if cur_stakeholder != '':
    stakeholders.append(cur_stakeholder)
    
if cur_description != '':
    descriptions.append(cur_description)

In [27]:
df = pd.DataFrame({'functionality': descriptions, 'use cases': stakeholders})
df.to_csv('use-case.csv', index=False)

### Functionality

In [33]:
from json import load, dump

summary_prompt = load(open('../src/models/prompt-summary.json', 'r'))
prompt = summary_prompt['prompt'].split('\n')

In [41]:
descriptions = []
stakeholders = []

cur_description = ''
cur_stakeholder = ''

for l in prompt:
    if l[:13] == '<instruction>':
        if cur_stakeholder != '':
            stakeholders.append(cur_stakeholder)
            cur_stakeholder = ''
            
        cur_description += l
    
    elif '<summary>' in l:
        if cur_description != '':
            descriptions.append(cur_description)
            cur_description = ''
            
        cur_stakeholder += l
        
    else:
        if cur_description != '':
            cur_description += l
            
        if cur_stakeholder != '':
            cur_stakeholder += l
        
if cur_stakeholder != '':
    stakeholders.append(cur_stakeholder)
    
if cur_description != '':
    descriptions.append(cur_description)

In [43]:
df = pd.DataFrame({'prompt': descriptions, 'summary': stakeholders})
df.to_csv('prompt-summary.csv', index=False)

### Random Prompts

In [46]:
prompts = load(open('../public/data/random-prompts.json', 'r'))
df = pd.DataFrame({'prompt': prompts})
df.to_csv('prompts.csv', index=False)