In [19]:
import warnings
warnings.filterwarnings("ignore")

In [17]:
from prettytable import PrettyTable
import time
import pprint
import wandb
import openai
from tenacity import retry, stop_after_attempt, wait_exponential
from pypdf import PdfReader
from langchain_community.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings

openai.api_key = "sk-proj-Tzc9mrWyEFVxyDsq5HiWT3BlbkFJxp47toOztG4XRILBeRxr" # enter your OpenAI API key here
openai_client = openai.OpenAI(api_key=openai.api_key)
use_wandb = True # set to True if you want to use wandb to log your config and results

use_portkey = False #set to True if you want to use Portkey to log all the prompt chains and their responses Check https://portkey.ai/

In [20]:
import os, sys
rpath = os.path.abspath('..')
if rpath not in sys.path:
    sys.path.insert(0, rpath)

from chunking import ChunkingApproaches

# Creating Prompt Database

In [18]:
from collections import namedtuple
Page = namedtuple("Page", ["id", "page_content", "metadata"])

def pdf_reader(file_path):
    reader = PdfReader(file_path)
    pdf_pages = []
    for page_number, page in enumerate(reader.pages):
        page_content = page.extract_text().strip()
        if page_content:
            metadata = {"page_number": page_number}  # Add any additional metadata as needed
            pdf_pages.append(Page(id=page_number, page_content=page_content, metadata=metadata))
    return pdf_pages

file_path = './Robinson.pdf'
pdf_pages = pdf_reader(file_path)
pdf_pages 

[Page(id=0, page_content='ADVISORY SERVICES AGREEMENT \n \nThis Advisory Services Agreement is entered into as of June 15th, 2023 (the “Effective Date ”), by and \nbetween Cloud Investments Ltd., ID 51-426526-3, an Israeli company (the " Company "), and Mr. Jack \nRobinson, Passport Number 780055578, residing at 1 Rabin st, Tel Aviv, Israel, Email: \njackrobinson@gmail.com ("Advisor "). \n \nWhereas, Advisor has expertise and/or knowledge and/or relationships, which are relevant to the \nCompany ’s business and the Company has asked Advisor to provide it with certain Advisory \nservices, as described in this Agreement; and \nWhereas,  Advisor has agreed to provide the Company with such services, subject to the terms set forth \nin this Agreement. \n \nNOW THEREFORE THE PARTIES AGREE AS FOLLOWS: \n \n1. Services:   \n1.1 Advisor shall provide to the Company, as an independent contractor, software development \nservices, and / or any other services as agreed by the parties from time to t

In [None]:

embed = OpenAIEmbeddings(model="text-embedding-3-small", openai_api_key=openai.api_key)
vectorstore = FAISS.from_documents(pdf_pages, embed)

In [None]:
question = ''
vectorstore.similarity_search(question)

# Prompt Generation

In [11]:
CANDIDATE_MODEL = 'gpt-4'
CANDIDATE_MODEL_TEMPERATURE = 0.9
NUMBER_OF_PROMPTS = 1
N_RETRIES = 3
WANDB_PROJECT_NAME = "mine-gpt-prompt-eng" 
WANDB_RUN_NAME = None 
EVAL_MODEL = 'gpt-3.5-turbo'
EVAL_MODEL_TEMPERATURE = 0
EVAL_MODEL_MAX_TOKENS = 1

In [9]:
candidate_gen_system_prompt = """Your job is to generate system prompts for GPT-4, given a description of the use-case and some test cases.


In your generated prompt, you should describe how the AI should behave in plain English. Include what it will see, and what it's allowed to output. Be creative in with prompts to get the best possible results. The AI knows it's an AI -- you don't need to tell it this.

You will be graded based on the performance of your prompt... but don't cheat! You cannot include specifics about the test cases in your prompt. Any prompts with examples will be disqualified.

Most importantly, output NOTHING but the prompt. Do not include anything else in your message."""

# The prompts you will be generating will be for classifiers, with 'true' and 'false' being the only possible outputs.


In [5]:
# Get Score - retry up to N_RETRIES times, waiting exponentially between retries.
@retry(stop=stop_after_attempt(N_RETRIES), wait=wait_exponential(multiplier=1, min=4, max=70))
def generate_candidate_prompts(description, test_cases, number_of_prompts):
  outputs = openai_client.chat.completions.create(
      model=CANDIDATE_MODEL,
      messages=[
          {"role": "system", "content": candidate_gen_system_prompt},
          {"role": "user", "content": f"Here are some test cases:`{test_cases}`\n\nHere is the description of the use-case: `{description.strip()}`\n\nRespond with your prompt, and nothing else. Be creative."}
          ],
      temperature=CANDIDATE_MODEL_TEMPERATURE,
      n=number_of_prompts,
    #   headers=HEADERS
      )

  prompts = []

  for i in outputs.choices:
    prompts.append(i.message.content)
  return prompts

In [8]:
test_cases = [
    {
        'prompt': 'Is the escrow amount greater than the retention amount.',
        'answer': 'No it is not'
    },
    {
        'prompt': 'Are there any conditions to the closing?',
        'answer': 'No, as the signing and closing are simultaneous.'
    },
    {
        'prompt': 'What is the purpose of escrow?',
        'answer': 'To serve as a recourse of the Buyer in case of post-closing adjustments of the purchase price'
    },
    {
        'prompt': 'Does any of the Sellers provide a representation with respect to any Tax matters related to the Company?',
        'answer': 'No. Only the Company provides such a representation.'
    },
    {
        'prompt': 'Are Change of Control Payments considered a Seller Transaction Expense?',
        'answer': 'yes'
    },
    # {
    #     'prompt': 'Make a reservation at The Accent for 9pm',
    #     'answer': 'false'
    # },
    # {
    #     'prompt': 'organize my google drive',
    #     'answer': 'false'
    # },
    # {
    #     'prompt': 'Find the highest-rated Italian restaurant near me.',
    #     'answer': 'true'
    # },
    # {
    #     'prompt': 'Explain the theory of relativity.',
    #     'answer': 'true'
    # },
    # {
    #     'prompt': 'What are the main differences between Python and Java programming languages?',
    #     'answer': 'true'
    # },
    # {
    #     'prompt': 'Translate the following English sentence to Spanish: "The weather today is great."',
    #     'answer': 'false'
    # },
    # {
    #     'prompt': 'Create a new event on my calendar for tomorrow at 2 pm.',
    #     'answer': 'false'
    # },
    # {
    #     'prompt': 'Write a short story about a lonely cowboy.',
    #     'answer': 'false'
    # },
    # {
    #     'prompt': 'Design a logo for a startup.',
    #     'answer': 'false'
    # },
    # {
    #     'prompt': 'Compose a catchy jingle for a new soda brand.',
    #     'answer': 'false'
    # },
    # {
    #     'prompt': 'Calculate the square root of 1999.',
    #     'answer': 'false'
    # },
    # {
    #     'prompt': 'What are the health benefits of yoga?',
    #     'answer': 'true'
    # },
    # {
    #     'prompt': 'find me a source of meat that can be shipped to canada',
    #     'answer': 'true'
    # },
    # {
    #     'prompt': 'Find the best-selling book of all time.',
    #     'answer': 'true'
    # },
    # {
    #     'prompt': 'What are the top 5 tourist attractions in Brazil?',
    #     'answer': 'true'
    # },
    # {
    #     'prompt': 'List the main ingredients in a traditional lasagna recipe.',
    #     'answer': 'true'
    # },
    # {
    #     'prompt': 'How does photosynthesis work in plants?',
    #     'answer': 'true'
    # },
    # {
    #     'prompt': 'Write a Python program to reverse a string.',
    #     'answer': 'false'
    # },
    # {
    #     'prompt': 'Create a workout routine for a beginner.',
    #     'answer': 'false'
    # },
    # {
    #     'prompt': 'Edit my resume to highlight my project management skills.',
    #     'answer': 'false'
    # },
    # {
    #     'prompt': 'Draft an email to a client to discuss a new proposal.',
    #     'answer': 'false'
    # },
    # {
    #     'prompt': 'Plan a surprise birthday party for my best friend.',
    #     'answer': 'false'
    # }
    ]

In [12]:
description = "You are giving answer for a legal contract"
candidate_prompts = generate_candidate_prompts(description, test_cases, NUMBER_OF_PROMPTS)

In [15]:
pprint.pp(candidate_prompts)

['You are an AI trained on large volumes of data across numerous fields. Your '
 'task is to answer queries about legal contracts accurately. The questions '
 'will be about different aspects of a contract such as details about escrow, '
 'closing conditions, purpose and definitions of specific terms, and tax '
 'matters. You need to analyze and understand the context of the question and '
 'provide precise and concise answers. Your output should not contain any '
 'irrelevant information and should directly address the question asked. The '
 'tone should be professional and formal, consistent with the nature of legal '
 'discourse.']
