In [19]:
import os
import json
OPENAI_API_KEY="sk-...."

os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

In [2]:
from langchain_openai import ChatOpenAI


In [3]:
llm = ChatOpenAI(model="gpt-4-1106-preview")

In [64]:
import yaml
from langchain.agents import create_json_agent
from langchain_community.agent_toolkits import JsonToolkit
from langchain_openai import OpenAI
from langchain_community.tools.json.tool import JsonSpec


# Read the JSON files
with open("json1.json") as f1:
    data1 = yaml.load(f1, Loader=yaml.FullLoader)

with open("json2.json") as f2:
    data2 = yaml.load(f2, Loader=yaml.FullLoader)

with open("json3.json") as f3:
    data3 = yaml.load(f3, Loader=yaml.FullLoader)

with open("supporting.json") as f_test:
    data_test = yaml.load(f_test, Loader=yaml.FullLoader)

# Create a JSON specification for each file
json_spec1 = JsonSpec(dict_=data1, max_value_length=4000)
json_spec2 = JsonSpec(dict_=data2, max_value_length=4000)
json_spec3 = JsonSpec(dict_=data3, max_value_length=4000)

json_spec_combined = JsonSpec(dict_=data_test, max_value_length=4000)

# Create a JSON toolkit with the specifications
json_toolkit_combined = JsonToolkit(spec=json_spec_combined)


# Initialize the LangChain agent with the OpenAI language model and the JSON toolkit
json_agent_executor = create_json_agent(llm=llm, toolkit=json_toolkit_combined, verbose=True)

# result=json_agent_executor.invoke({"input": f"""compare {data_test} with {data1}. If all attributes from data1 present in data_test, then the test should be successful. If not present,
#                                    then test should not be successfull. So present the result in the json format and  include only if the result is succesful or not"""})

prompt1 = f""" You are an experienced auditor and performing accrued expense procedure. Your task is to compare Invoice with the provided json documents. You compare the given invoice with
all the provided documents one by one untill all data set is completed. You will create a final report which include all matching and non matching data set.
Invoice:{data_test} and json document sets:{data2}, {data3}, {data1}"""

prompt2 = f"""You are an experienced auditor and performing accrued expense procedure. Your task is to compare Invoice with the provided json document.You compare the given invoice with
all the provided documents one by one untill all data set is completed. You will create a final report which include all matching and non matching data set.
Compare invoice data set: {json_spec_combined} with the provided data sets: {json_spec2}, {json_spec3}, & {json_spec1}. Compare the invoice data with the provided data sets one by one and very exclusively.
Provide selection_ids of the matching sets. If no matching happens, make selection_ids=None"""

prompt3 = f"""You are an experienced auditor performing an accrued expense procedure. Your task is to compare the invoice data with all the provided JSON documents. 
You will compare the given invoice with each of the provided documents individually and create a final report that includes all matching and non-matching data sets. 
Compare the invoice data set: {json_spec_combined} with the provided data sets: {json_spec2}, {json_spec3}, and {json_spec1}"""

result=json_agent_executor.invoke({"input": prompt3})




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: json_spec_list_keys
Action Input: data[0m
Observation: [36;1m[1;3m['invoice_id', 'selection_id', 'vendor', 'amount', 'currency', 'invoice_date'][0m
Thought:[32;1m[1;3mI now have the list of keys available in the JSON data that corresponds to an invoice. I need to compare the given invoice data with the provided data sets to check for matching and non-matching fields.

Thought: I should start by comparing the 'selection_id' as it seems to be a unique identifier for each entry.

Thought: Before I can compare, I need to get the value of 'selection_id' from the JSON data to compare it with the given 'selection_id' values in the provided data sets.

Action: json_spec_get_value
Action Input: data["selection_id"][0m
Observation: [33;1m[1;3mSI101[0m
Thought:[32;1m[1;3mThe 'selection_id' in the JSON data is 'SI101', which matches the 'selection_id' in one of the provided data sets.

Thought: Next, I should compare 

In [65]:
result['output']

"The JSON data matches the last provided data set with 'selection_id' 'SI101'. The 'amount' is 1200.0, 'currency' is 'USD', and 'invoice_date' is '2024-04-01', which all correspond to the given invoice data set and the last provided data set. The 'vendor' field in the JSON data ('Staples Inc') does not directly match the 'description' field in the provided data set ('Office supplies'), but this is not necessarily a mismatch in the context of the audit as these fields represent different aspects of the transaction. The other two provided data sets do not match the JSON data as they have different 'selection_id' values ('SI102' and 'SI103') and different 'amounts'."

In [66]:
data1

{'selection_id': 'SI101',
 'description': 'Office supplies',
 'amount': 1200.0,
 'currency': 'USD',
 'date': '2024-04-01'}

In [67]:
data2

{'selection_id': 'SI102',
 'description': 'Consulting services',
 'amount': 4500.0,
 'currency': 'USD',
 'date': '2024-03-15'}

In [68]:
data3

{'selection_id': 'SI103',
 'description': 'Office supplies',
 'amount': 1200.0,
 'currency': 'USD',
 'date': '2024-04-01'}

In [73]:
data_test

{'invoice_id': 'INV1234',
 'selection_id': 'SI101',
 'vendor': 'Staples Inc',
 'amount': 1200.0,
 'currency': 'USD',
 'invoice_date': '2024-04-01'}

## Simple Prompting

In [118]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

from langchain_core.output_parsers import JsonOutputParser


In [125]:
template = ChatPromptTemplate.from_messages([
    ("system", "You are an experienced auditor performing an accrued expense procedure. Your task is to compare the invoice data with other data in JSON documents. "),
    ("human", " Here is the original invoice {data_test}, and other json files  {data1}, {data2} and {data3}. Get the output in the JSON format like this - \
      'data1':'not match', 'data2':'match', 'data3':'match' " ),
])
llm = ChatOpenAI(model="gpt-4-1106-preview")

chain = template | llm 

result=chain.invoke({"data_test":data_test,
               "data1":data1,
                "data2":data2,
                "data3":data3})


In [126]:
print(result.content)

To determine if the original invoice matches with the other JSON documents provided, we need to compare the relevant fields. Specifically, we compare the 'selection_id', 'amount', 'currency', and 'date'/'invoice_date' fields.

Here is the comparison:

Original Invoice:
```json
{
  'invoice_id': 'INV1234',
  'selection_id': 'SI101',
  'vendor': 'Staples Inc',
  'amount': 1200.0,
  'currency': 'USD',
  'invoice_date': '2024-04-01'
}
```

JSON Document 1:
```json
{
  'selection_id': 'SI101',
  'description': 'Office supplies',
  'amount': 1200.0,
  'currency': 'USD',
  'date': '2024-04-01'
}
```

JSON Document 2:
```json
{
  'selection_id': 'SI102',
  'description': 'Consulting services',
  'amount': 4500.0,
  'currency': 'USD',
  'date': '2024-03-15'
}
```

JSON Document 3:
```json
{
  'selection_id': 'SI103',
  'description': 'Office supplies',
  'amount': 1200.0,
  'currency': 'USD',
  'date': '2024-04-01'
}
```

Now, we perform the matching:

- Data1: The 'selection_id' matches, but t

In [110]:
data2

{'selection_id': 'SI102',
 'description': 'Consulting services',
 'amount': 4500.0,
 'currency': 'USD',
 'date': '2024-03-15'}

In [111]:
data_test

{'invoice_id': 'INV1234',
 'selection_id': 'SI101',
 'vendor': 'Staples Inc',
 'amount': 1200.0,
 'currency': 'USD',
 'invoice_date': '2024-04-01'}

In [102]:
print(result.content)

To compare the original invoice with the other JSON documents, we will look for the matching `selection_id` and then compare the other fields to ensure they align with the original invoice data. Once we have made all the comparisons, we will output the results in JSON format indicating which documents match and which do not.

Here is the comparison and the resulting JSON output:

```json
{
  "data2": "not match",
  "data3": "match",
  "data1": "match"
}
```

Explanation:
- `data1` refers to the first JSON document, which has a `selection_id` of 'SI102'. It does not match the original invoice's `selection_id` of 'SI101', so it is labeled as "not match".
- `data2` refers to the second JSON document, which has a `selection_id` of 'SI103'. Similar to `data1`, it does not match the original invoice's `selection_id`, so it is also labeled as "not match".
- `data3` refers to the third JSON document, which has a `selection_id` of 'SI101'. This matches the original invoice's `selection_id`, and