In [4]:
from langchain.llms import OpenAI # for new models
from langchain.chat_models import ChatOpenAI # for older models
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.schema import SystemMessage, HumanMessage
import os
import json
import pandas as pd
from tqdm import tqdm
import tiktoken
from dotenv import load_dotenv
import time
import warnings
warnings.filterwarnings('ignore')

# Load OpenaAI API key
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# Define Classification Function

In [7]:
def count_tokens(prompt):
    encoding = tiktoken.get_encoding("cl100k_base")
    return len(encoding.encode(prompt))

def classify_sentence(input_text, template, classes=["FUN", "STR", "MIX", "OTH"], model="gpt-3.5-turbo"):

    # Format the classes correctly for the prompt
    class_labels = ", ".join(classes)  
    
    # Define a prompt template for classification
    prompt_template = PromptTemplate(
        input_variables=["text", "class_labels"],
        template=template
    )

    # Create an OpenAI LLM instance
    #llm = OpenAI(                    # for newer models
    llm = ChatOpenAI(                 # for older models
        model=model,
        temperature=0,
        max_retries=2,
        max_tokens=10  # Slightly increased for safety
    )

    # Set up the classification chain
    classification_chain = LLMChain(llm=llm, prompt=prompt_template)

    # Perform Classification
    pred_class = classification_chain.run(text=input_text, class_labels=class_labels).strip()

    # Calculate token count
    token_count = count_tokens(prompt_template.format(text=input_text, class_labels=class_labels))

    # Debugging statement (prints the formatted prompt)
    #print(f"Generated Prompt:\n{prompt_template.format(text=input_text, class_labels=class_labels)}")

    return pred_class, token_count


In [8]:
# Sample Sentences
input_text = "Additionally, the stopper 108 is used at the distal end of the wire where the loop is formed to substantially secure the loop closed." # MIX
input_text = "Provisional Patent Application number 62/571,193; filed Oct. 11, 2017; and entitled INSECT VACUUM AND TRAP ATTACHMENT SYSTEMS." #OTH
input_text = "In some embodiments, the horizontal position of the idler support block 1213 may be adjustable to maintain tension on the chain 1212." #FUN
input_text = "If there are no allocated cells to a hub using the previous criterion, the first allocated cell will be the closest cell to that hub." #FUN
input_text = "The rigid foam layer 50 is typically selected from the group of polyurethane foams, polyurea foams, and combinations thereof." # STR

# Define Initial Prompt Template for testing
template = """
        Your task is to classify a given sentence as either: 
        * 'FUN' - if the sentence describes only the functioning or behavior of a device;
        * 'STR' - if the sentence describes only the structure or architecture of a device;
        * 'MIX' - if the sentence describes both the functioning and the structure of a device;
        * 'OTH' - if the sentence cannot be classified according to any of the previous classes.
        
        The output should only contain one of the class labels: {class_labels}.
        
        Sentence: "{text}"
        Class:
"""

# Test the Classification Function
pred_class, token_count = classify_sentence(input_text=input_text, template=template)
print(f"Token Count: {token_count}")
print(f"Predicted Category: {pred_class}")

Token Count: 152
Predicted Category: STR


# Define Prompts

In [3]:
# Define Context (1)
context = "Patent documents describe the functioning and the structure of patented systems."

# Define Definitions (2)
def_FUN = "the functions, processes, actions or tasks that the system or its components are capable of performing to achieve their intended purpose."
def_STR = "the architecture, arrangements, attributes, parameters, properties and features of the system and its components."

def_FUNCTIONING = "The functioning of a system refers to " + def_FUN
def_STRUCTURE = "The structure of a system refers to " + def_STR

def_not_FUNCTIONING = "The functioning of a system do not refer to " + def_STR
def_not_STRUCTURE = "The structure of a system do not refer to " + def_FUN

# Define the Task
task = """Your task is to classify a given sentence as either: 
* 'FUN' - if the sentence describes only the functioning or behavior of a device;
* 'STR' - if the sentence describes only the structure or architecture of a device;
* 'MIX' - if the sentence describes both the functioning and the structure of a device;
* 'OTH' - if the sentence cannot be classified according to any of the previous classes.
The output should only contain one of the class labels: {class_labels}."""

# Define Examples (3)
example_FUN = "Sentence: 'when cutting the branch, a force is applied by the user to the first handle 30 and the second handle 50 to close them inwardly.' \nClass: FUN"
example_STR = "Sentence: 'the plurality of rotor blades 40b are arranged at intervals in the circumferential direction, and include the braking system 23.' \nClass: STR"
example_MIX = "Sentence: 'the device 10 comprises a secondary outlet 16 near the bottom wall that directly releases hot water into the container 15.' \nClass: MIX"
example_OTH = "Sentence: 'note that the following description of embodiments is merely an example in nature, and is not intended to limit the scope, applications, or use of the present invention.' \nClass: OTH"
examples = "\n".join([example_FUN, example_STR, example_MIX, example_OTH])

# Define Output
output_format ="Sentence: '{text}' \nClass:"

In [4]:
# Store templates
templates = {}

'''----------------------------------------------------------------------------------'''

# Define Prompt 1: task
prompt1 = []
prompt1.append(task)
prompt1.append(output_format)
template1 = "\n".join(prompt1)
templates['prompt1'] = template1

'''----------------------------------------------------------------------------------'''

# Define Prompt 2 : context 
prompt2 = []
prompt2.append(context)
prompt2.append(task)
prompt2.append(output_format)
template2 = "\n".join(prompt2)
templates['prompt2'] = template2

# Define Prompt 3 : definitions 
prompt3 = []
prompt3.append(def_FUNCTIONING)
prompt3.append(def_STRUCTURE)
prompt3.append(task)
prompt3.append(output_format)
template3 = "\n".join(prompt3)
templates['prompt3'] = template3

# Define Prompt 4 : examples 
prompt4 = []
prompt4.append(task)
prompt4.append(examples)
prompt4.append(output_format)
template4 = "\n".join(prompt4)
templates['prompt4'] = template4

'''----------------------------------------------------------------------------------'''

# Define Prompt 5 : context + definitions 
prompt5 = []
prompt5.append(context)
prompt5.append(def_FUNCTIONING)
prompt5.append(def_STRUCTURE)
prompt5.append(task)
prompt5.append(output_format)
template5 = "\n".join(prompt5)
templates['prompt5'] = template5

# Define Prompt 6 : context + examples 
prompt6 = []
prompt6.append(context)
prompt6.append(task)
prompt6.append(examples)
prompt6.append(output_format)
template6 = "\n".join(prompt6)
templates['prompt6'] = template6

# Define Prompt 7 : definitions + examples 
prompt7 = []
prompt7.append(def_FUNCTIONING)
prompt7.append(def_STRUCTURE)
prompt7.append(task)
prompt7.append(examples)
prompt7.append(output_format)
template7 = "\n".join(prompt7)
templates['prompt7'] = template7

'''----------------------------------------------------------------------------------'''

# Define Prompt 8 : context + definitions + examples 
prompt8 = []
prompt8.append(context)
prompt8.append(def_FUNCTIONING)
prompt8.append(def_STRUCTURE)
prompt8.append(task)
prompt8.append(examples)
prompt8.append(output_format)
template8 = "\n".join(prompt8)
templates['prompt8'] = template8

In [5]:
# Vizualize template for debugging template's format
for name, template in templates.items():
    print(f"{name}:")
    print(template)
    print()

prompt1:
Your task is to classify a given sentence as either: 
* 'FUN' - if the sentence describes only the functioning or behavior of a device;
* 'STR' - if the sentence describes only the structure or architecture of a device;
* 'MIX' - if the sentence describes both the functioning and the structure of a device;
* 'OTH' - if the sentence cannot be classified according to any of the previous classes.
The output should only contain one of the class labels: {class_labels}.
Sentence: '{text}' 
Class:

prompt2:
Patent documents describe the functioning and the structure of patented systems.
Your task is to classify a given sentence as either: 
* 'FUN' - if the sentence describes only the functioning or behavior of a device;
* 'STR' - if the sentence describes only the structure or architecture of a device;
* 'MIX' - if the sentence describes both the functioning and the structure of a device;
* 'OTH' - if the sentence cannot be classified according to any of the previous classes.
The out

#  Classify Sentences

In [6]:
# Import data
test_df = pd.read_excel("/home/fantoni/patent-sentence-classification/data/test_agreement.xlsx")
test_df

Unnamed: 0,sent_id,sent,sent_tag,sent_class
0,1496967,Such an example with implant 81 and an inserti...,OTH,3
1,1531576,Both Extracts 1-3 and 6-9 samples and standard...,FUN,0
2,91234,The above-described steps can be implemented u...,FUN,0
3,1552397,The recovery of the ingestible device may be p...,FUN,0
4,71110,Embodiments of the disclosure may be implement...,FUN,0
...,...,...,...,...
1195,1221937,In accordance with examples of the present dis...,STR,1
1196,1100995,"By way of non-limiting example, the online pla...",FUN,0
1197,1168562,"At a step 620, the verifying device verifies t...",FUN,0
1198,1273964,Transform block shown in figure 7(b) may mean ...,STR,1


In [7]:
# Set the model to use
model =  "gpt-3.5-turbo"

for templ_name, template in templates.items():
    
    # Initialize results dictionary
    results = {
        'sent_id': [],
        'sent': [],
        'true_class': [],
        'pred_class': [],
        'errors': [],
        'prompt_tokens': [],
        'elapsed_time_sec': []
    }

    # Perform Classification
    for i, row in tqdm(test_df.head(10).iterrows(), total=len(test_df), desc="Processing Sentences"):  
        start_time = time.time()  
        sent_id = row['sent_id']
        sent = row['sent']
        true_class = row['sent_tag']
        
        try:
            results['sent_id'].append(sent_id)
            results['sent'].append(sent)
            results['true_class'].append(true_class)

            # Get classification
            pred_class, token_count = classify_sentence(input_text=sent, model=model, template=template)

            # Validate classification
            if pred_class not in ["FUN", "STR", "MIX", "OTH"]:
                pred_class = "UNK"  # Default to UNK if invalid classification

            results['pred_class'].append(pred_class)
            results['errors'].append(None)  # No error occurred
            results['prompt_tokens'].append(token_count)  # Append token count when successful

        except Exception as e:
            # Handle any errors that occur during processing
            results['pred_class'].append("ERROR")
            results['prompt_tokens'].append("ERROR") 
            results['errors'].append(str(e))

        # Tracking how long it took to process the sentence
        end_time = time.time()
        elapsed_time = end_time-start_time
        results['elapsed_time_sec'].append(f"{elapsed_time}")

    # Convert results to DataFrame and Save
    result_df = pd.DataFrame(results)
    result_df.to_excel(f'/home/fantoni/patent-sentence-classification/results/prompting/{model}_{templ_name}.xlsx', index = False)
    print(f"{templ_name} completed.")

Processing Sentences:   1%|          | 10/1200 [00:09<18:58,  1.05it/s]


prompt1 completed.


Processing Sentences:   1%|          | 10/1200 [00:04<08:23,  2.36it/s]


prompt2 completed.


Processing Sentences:   1%|          | 10/1200 [00:05<10:01,  1.98it/s]


prompt3 completed.


Processing Sentences:   1%|          | 10/1200 [00:06<13:38,  1.45it/s]


prompt4 completed.


Processing Sentences:   1%|          | 10/1200 [00:05<10:19,  1.92it/s]


prompt5 completed.


Processing Sentences:   1%|          | 10/1200 [00:05<11:44,  1.69it/s]


prompt6 completed.


Processing Sentences:   1%|          | 8/1200 [00:25<1:04:15,  3.23s/it]


KeyboardInterrupt: 

# Cost of Prompting

To calculate cost: https://openai.com/api/pricing/

In [25]:
# Worst case Scenario
input_tokens = 200 
output_tokens = 3
n_sentence = 600

# gpt-4o
cost_prompt = input_tokens * 2.50/1000000
cost_output = output_tokens * 10/1000000

# gpt-3.5-turbo
cost_prompt = input_tokens * 3/1000000
cost_output = output_tokens * 6/1000000

# gpt-3.5-turbo-0125
cost_prompt = input_tokens * 0.5/1000000
cost_output = output_tokens * 1.5/1000000

print(f"Cost Prompt: {cost_prompt:.5f}$")
print(f"Cost Output: {cost_output:.5f}$")
print(f"Tot. Cost per Sentence: {cost_prompt + cost_output:.5f}$")
print(f"Tot. Cost for {n_sentence} Sentence: {(cost_prompt + cost_output)*n_sentence:.2f}$")

Cost Prompt: 0.00010$
Cost Output: 0.00000$
Tot. Cost per Sentence: 0.00010$
Tot. Cost for 600 Sentence: 0.06$
