In [22]:

from dotenv import load_dotenv
load_dotenv()


True

In [23]:
from typing import List, Union
from pydantic import BaseModel, Field
from langchain.utils.openai_functions import convert_pydantic_to_openai_function
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAI as OpenAi


class NER1(BaseModel):
    Agreement_Value: Union[float, str] = Field(default="", description="Numeric value of agreement value")
    Agreement_Start_Date: str = Field(default="", description="Agreement start date in %dd-%mm-%yyyy")
    Agreement_End_Date: str = Field(default="", description="Agreement end date in %dd-%mm-%yyyy")
    Renewal_Notice_Days: Union[int, str] = Field(default="", description="Number of days for the renewal notice in Integer")
    Party_One: List[str] = Field(default_factory=list, description="Person names of party one")
    Party_Two: List[str] = Field(default_factory=list, description="Person names of party two")

In [24]:
def openai_response(text:str):
    model = ChatOpenAI(temperature=0)
    extraction_functions = [convert_pydantic_to_openai_function(NER1)]
    extraction_model = model.bind(functions=extraction_functions, function_call={"name": "NER1"})
    prompt = ChatPromptTemplate.from_messages([
    ("system", "Extract the relevant information, if not explicitly provided do not guess. Extract partial info."),
    ("human", "{input}")])
    extraction_chain = prompt | extraction_model | JsonOutputFunctionsParser()
    d=extraction_chain.invoke({"input": text})
    return d

In [25]:
import pandas as pd

df = pd.read_csv("data\\train_extracted.csv")
df.head()

Unnamed: 0,File Name,Extracted Text
0,18325926-Rental-Agreement-1,RENTAL AGREEMENT\nThis deed of rental agreemen...
1,36199312-Rental-Agreement,RENEWAL OF RENTAL AGREEMENT\n\nThis AGREEMENT ...
2,44737744-Maddireddy-Bhargava-Reddy-Rental-Agre...,RENTfft\tENT\nThis Rental Agreement is made an...
3,46239065-Standard-Rental-Agreement-Rental-With...,STANDARD REAL ESTATE RENTAL AGREEMENT\nThis Ag...
4,47854715-RENTAL-AGREEMENT,RENTAL AGREEMENT\nTHIS DEED OF RENTAL AGREEMEN...


In [26]:
openai_response(df['Extracted Text'][0])

{'Party_One': ['MR.K.Kuttan'], 'Party_Two': ['P.M. Narayana Namboodri']}

In [14]:
openai_response(df['Extracted Text'][1])

{'Agreement_Start_Date': '1-05-2010',
 'Party_One': ['Mr. Balaji.R'],
 'Party_Two': ['Mr.Kartheek R']}

In [32]:
openai_response(df['Extracted Text'][2])

{'Party_One': ['M. V, V« VI JAY A SHANKAR'],
 'Party_Two': ['MADDIREDDY BHARGAVA REDDY']}

In [33]:
openai_response(df['Extracted Text'][3])

{'Party_One': ['PROSPER 4 LESS,LLC'],
 'Party_Two': ['Jose Luis Salcido'],
 'Agreement_Start_Date': '11-12-2008',
 'Agreement_End_Date': '',
 'Renewal_Notice_Days': ''}

In [34]:
openai_response(df['Extracted Text'][4])

{'Party_One': ['Mr. P C MATHEW'], 'Party_Two': ['Mr. L GOPINATH']}

In [35]:
openai_response(df['Extracted Text'][5])

{'Party_One': ['Mr. P. JohnsonRavikumar'],
 'Party_Two': ['Mr. Saravanan BV'],
 'Agreement_Start_Date': '01.04.2010',
 'Agreement_End_Date': '30.03.2011',
 'Renewal_Notice_Days': 90}

In [36]:
openai_response(df['Extracted Text'][6])

{'Agreement_Start_Date': '1st May 2010',
 'Party_One': ['Mr. Balaji.R'],
 'Party_Two': ['Mr.Kartheek R']}

In [37]:
openai_response(df['Extracted Text'][7])

{'Agreement_Start_Date': '1-05-2010',
 'Party_One': ['Mr. Balaji.R'],
 'Party_Two': ['Mr.Kartheek R']}

In [38]:
openai_response(df['Extracted Text'][8])

{'Party_One': ['Antonio Levy S. Ingles. Jr.', 'Mary Rose C. Ingles'],
 'Party_Two': ['GERALDINE O. GALINATO'],
 'Agreement_Start_Date': '20th day of May, 2007',
 'Agreement_End_Date': '20th day of May, 2008',
 'Agreement_Value': 6500}

In [42]:
openai_response(df['Extracted Text'][9])

{'Party_One': ['Antonio Levy S. Ingles. Jr.', 'Mary Rose C. Ingles'],
 'Party_Two': ['GERALDINE O. GALINATO'],
 'Agreement_Start_Date': '20th day of May, 2007',
 'Agreement_End_Date': '20th day of May, 2008',
 'Renewal_Notice_Days': 15}