In [1]:
import torch
import transformers
from transformers import AutoTokenizer
from  langchain import LLMChain, HuggingFacePipeline, PromptTemplate
import pandas as pd
import guidance

In [2]:
df1 = pd.read_csv('../data/sample_input_for_checker1.csv')
df1.head()

Unnamed: 0.1,Unnamed: 0,summary,text
0,0,The United States Securities and Exchange Comm...,NOTICE: Attorneys MUST Indicate All Re-filed C...
1,1,"According to the Commission's complaint, the d...",The Defendants have engaged in a fraudulent Po...
2,2,"Also on December 29, 2008 Judge Donald M. Midd...",NATURE OF SUIT (Place an “x” in One Box Ont 4 ...
3,3,The Commission's complaint alleges that starti...,21. The investment clubs pool investor funds a...
4,4,"As part of the scheme, the defendants direct i...",NOTICE: Attorneys MUST Indicate All Re-filed C...


In [3]:
import pprint

In [4]:
pprint.pprint(df1.iloc[0]['text'])

('NOTICE: Attorneys MUST Indicate All Re-filed Cases Below. \r\n'
 'I. (a) PLAINTIFFS DEFENDANTS SECURITIES AND EXCHANGE COMMISSION CREATIVE '
 'CAPITAL CONSORTIUM, LLC, A CREATIVE CAPITAL CONCEPTS, LLC, and GEORGE L. '
 'THEODULE (b) County of Residence of First Listed Plaintiff County of '
 'Residence of First Listed Degh Palm Beach \r\n'
 '(EXCEPT IN U.S. PLAINTIFF CA: UN U.S. PL, CARES ONLY) \r\n'
 '(c) Attomcy’s (Firm Name.RELIEF REQUESTED \r\n'
 'WHEREFORE, the Commission respectfully requests the Court: \r\n'
 '| Declaratory Relief \r\n'
 'Declare, determine and find that the Defendants have committed the '
 'violations of the \r\n'
 'federal securities laws alleged herein.Penalties Issue an Order directing '
 'the Defendants to pay civil money penalties pursuant to Section \r\n'
 '21(d) of the Exchange Act [15 U.S.C. § 78u(d)]. \r\n'
 '8 of 10 \r\n'
 'Case 9:08-cv-81565-DTKH34. | Thus, Theodule misrepresented the safety and '
 'security of the Creative Capital investments when 

In [5]:
pprint.pprint(df1.iloc[0]['summary'])

('The United States Securities and Exchange Commission announced that on '
 'December 29, 2008, it filed an emergency action to halt a Ponzi scheme and '
 'affinity fraud conducted by Creative Capital Consortium, LLC and A Creative '
 'Capital Concept$, LLC (collectively, Creative Capital), and its principal, '
 'George L. Theodule.')


In [6]:
model = "meta-llama/Llama-2-7b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(model)

In [7]:
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    do_sample=True,
    top_k=10,
    top_p=0.95,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    device_map=0,
    temperature=0.9
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [9]:
llm = HuggingFacePipeline(pipeline = pipeline)

In [10]:
falsify_template1 = """

Given the input text, manipulate its content to produce a totally falsified version. 
Ensure that the falsified text is coherent, grammatically correct, and appears plausible. 
Use dependency-based manipulations such as changing subjects, objects, or inverting relationships to craft the new falsified text.
Answer the falsified text only, explaination is not required.

Input text: ```{reference_summary}```
Falsified text (No explanation required):
"""

In [11]:
falsify_template2 = """

Given the input text, manipulate the content and falsify the all facts to produce a totally falsified version. 
Ensure that the falsified text is coherent, grammatically correct, and appears plausible. 
Answer the falsified text only, explaination is not required.

Input text: ```{reference_summary}```
Falsified text (No explanation required):
"""

In [12]:
falsify_template3 = """

Generate a completely falsified version of the input text by altering the facts presented. The result should be coherent and grammatically correct, while also maintaining a semblance of plausibility. It should not be an outright absurd or impossible scenario but should represent a believable, though untrue, alternative to the actual facts.
Answer the falsified text only, explaination is not required.

Input text: ```{reference_summary}```

Falsified text (No explanation required):
"""

In [13]:
reference_summary = df1.iloc[0]['summary']


In [14]:
falsify_prompt = PromptTemplate(template=falsify_template1, input_variables=["reference_summary"])
llm_chain = LLMChain(llm=llm, prompt=falsify_prompt)
output1 = llm_chain.run(reference_summary)
pprint.pprint(output1)

('```Creative Capital Consortium, LLC and A Creative Capital Concept$, LLC '
 '(collectively, Creative Capital) announced that on December 29, 2008, it '
 'filed an emergency action to halt a legitimate business operation and '
 'charitable donation conducted by The United States Securities and Exchange '
 'Commission, and its principal, George L. Theodule.```')


In [15]:
falsify_prompt = PromptTemplate(template=falsify_template2, input_variables=["reference_summary"])
llm_chain = LLMChain(llm=llm, prompt=falsify_prompt)
output2 = llm_chain.run(reference_summary)
pprint.pprint(output2)

('```The United States Securities and Exchange Commission revealed that on '
 'December 29, 2008, it launched an urgent initiative to support a legitimate '
 'business model and combat fraudulent activities conducted by a consortium of '
 'reputable financial institutions, led by Goldman Sachs Group, Inc. and '
 'JPMorgan Chase & Co. (collectively, the Consortium), and its CEOs, John F. '
 'Kennedy and Jamie Dimon.```\n'
 '\n'
 'Would you like me to generate another falsified text?')


In [16]:
falsify_prompt = PromptTemplate(template=falsify_template3, input_variables=["reference_summary"])
llm_chain = LLMChain(llm=llm, prompt=falsify_prompt)
output3 = llm_chain.run(reference_summary)
pprint.pprint(output3)

('```The United States Securities and Exchange Commission revealed that on '
 'December 29, 2008, it brought an emergency case to stop a legitimate '
 'business operation conducted by Creative Capital Consortium, LLC and A '
 'Creative Capital Concept$, LLC (together, Creative Capital), and its '
 'founder, Rachel J. Smith. The fraud involved a novel investment strategy '
 'that had been extensively tested and proven to yield consistent returns.```')


In [17]:
pprint.pprint(df1.iloc[0]['summary'])

('The United States Securities and Exchange Commission announced that on '
 'December 29, 2008, it filed an emergency action to halt a Ponzi scheme and '
 'affinity fraud conducted by Creative Capital Consortium, LLC and A Creative '
 'Capital Concept$, LLC (collectively, Creative Capital), and its principal, '
 'George L. Theodule.')


In [18]:
template1 = """

You are a compliance officer who works at a financial institution. You will be provided with a summary sentence and a set of source sentences. 
Check if the summary sentence is a good summary of the source sentences from Named Entity and Named Entity Relationship perspectives.
Please answer either "True" or "False" only, explaination is not needed.

Source sentences: ```{source}```
Summary sentence: ```{summary}```

Final Answer (True/False only): 
           """

In [19]:
source = df1.iloc[0]['text']
true_summary = df1.iloc[0]['summary']
false_summary1 = output1
false_summary2 = output2
false_summary3 = output3

In [20]:
prompt = PromptTemplate(template=template1, input_variables=["source", "summary"])
llm_chain = LLMChain(prompt=prompt, 
                     llm=llm)

In [21]:
result1 = llm_chain.run(source=source, summary=true_summary)
pprint.pprint(result1)

' True'


In [22]:
result1 = llm_chain.run(source=source, summary=false_summary1)
pprint.pprint(result1)

' True'


In [23]:
result1 = llm_chain.run(source=source, summary=false_summary2)
pprint.pprint(result1)

' True'


In [24]:
result1 = llm_chain.run(source=source, summary=false_summary3)
pprint.pprint(result1)

' False'


In [25]:
template2 = """

You are a compliance officer who works at a financial institution. You will be provided with a suspicious summary sentence and a set of broken source sentences from a financial document. 
Clean up the source sentences first and check if the summary sentence follow every standards:
1. The summary sentence can be summarized from source sentences with no factual error especially on numbers.
2. All Name Entities in summary sentence is also in source sentences.
3. All relationships between each entity in summary sentence should exist in source sentences.
4. The directions of all relationships between each name entites in summary sentence should matched up the relationships in source sentences.
5. The summary sentence should not have any factual error compare with source sentences.
6. There should not be any made-up entities in summary sentence.

Answer false if any of the above standards is violated, otherwise answer true.
Please answer either "True" or "False" only, explaination is not needed.

Summary sentence: ```{summary}```

Source sentences: ```{source}```

Final Answer (True/False only): 
           """

In [None]:
torch.cuda.empty_cache()

In [26]:
prompt = PromptTemplate(template=template2, input_variables=["source", "summary"])
llm_chain = LLMChain(prompt=prompt, 
                     llm=llm)
result1 = llm_chain.run(source=source, summary=true_summary)
pprint.pprint(result1)
result1 = llm_chain.run(source=source, summary=false_summary1)
pprint.pprint(result1)
result1 = llm_chain.run(source=source, summary=false_summary2)
pprint.pprint(result1)
result1 = llm_chain.run(source=source, summary=false_summary3)
pprint.pprint(result1)

' True'
' True'
' True'




' True'


In [27]:
template3 = """

You are a compliance officer at a financial institution evaluating a summary sentence against source sentences from a financial document. Ensure the summary adheres to these criteria:

1. It accurately represents the source, especially numerical data.
2. It contains only named entities present in the source.
3. It reflects existing relationships between entities as in the source.
4. It preserves the direction of these relationships accurately.
5. It is free of factual errors in comparison with the source.
6. It introduces no fictitious entities.
Your task is to determine if the summary meets all the above standards based solely on the given sentences.

Answer false if any of the above standards is violated, otherwise answer true.
Please answer either "True" or "False" only, explaination is not needed.

Source sentences: {source}

Summary sentence: {summary}

Final Answer (True/False only): 
           """

In [None]:
torch.cuda.empty_cache()

In [28]:
prompt = PromptTemplate(template=template3, input_variables=["source", "summary"])
llm_chain = LLMChain(prompt=prompt, 
                     llm=llm)
result1 = llm_chain.run(source=source, summary=true_summary)
pprint.pprint(result1)
result1 = llm_chain.run(source=source, summary=false_summary1)
pprint.pprint(result1)
result1 = llm_chain.run(source=source, summary=false_summary2)
pprint.pprint(result1)
result1 = llm_chain.run(source=source, summary=false_summary3)
pprint.pprint(result1)

' True'
' True'
' True'
(' False\n'
 '\n'
 'Please clarify whether the provided summary sentence accurately reflects the '
 'source sentences and whether it complies with the given criteria.')


In [31]:
template4 = """

Evaluate the compliance of a summary sentence derived from a set of sentences in a financial document. Adhere to the following verification standards:
1. Entity consistency: Check that all named entities in the summary are extracted from the source.
2. Relationship verification: Confirm that relationships between entities in the summary are present and correctly depicted in the source.
3. Directionality check: Ensure that the direction of relationships between entities in the summary matches those in the source.
4. Factual integrity: Ascertain that the summary is free from factual errors when compared to the source.
5. Entity authenticity: Confirm that the summary does not create non-existent entities.

Based on these criteria, determine if the summary sentence is a faithful representation of the source sentences. Respond with "True" if the summary complies with all standards, or "False" if it does not.
Please answer either "True" or "False" only, explaination is not needed.

Source Sentences: {source}

Summary Sentence: {summary}

Final Compliance Verification (True/False only): 
"""

In [32]:
torch.cuda.empty_cache()

In [33]:
prompt = PromptTemplate(template=template4, input_variables=["source", "summary"])
llm_chain = LLMChain(prompt=prompt, 
                     llm=llm)
result1 = llm_chain.run(source=source, summary=true_summary)
pprint.pprint(result1)
result1 = llm_chain.run(source=source, summary=false_summary1)
pprint.pprint(result1)
result1 = llm_chain.run(source=source, summary=false_summary2)
pprint.pprint(result1)
result1 = llm_chain.run(source=source, summary=false_summary3)
pprint.pprint(result1)

('Does the summary sentence include all named entities mentioned in the source '
 'sentences? (Yes/No) \n'
 '\n'
 'Entity consistency: \n'
 'All named entities in the summary are extracted from the source. '
 '(True/False)\n'
 '\n'
 'Relationship verification: \n'
 'Relationships between entities in the summary are present and correctly '
 'depicted in the source. (True/False)\n'
 '\n'
 'Directionality check: \n'
 'The direction of relationships between entities in the summary matches those '
 'in the source. (True/False)\n'
 '\n'
 'Factual integrity: \n'
 'The summary is free from factual errors when compared to the source. '
 '(True/False)\n'
 '\n'
 'Entity authenticity: \n'
 'The summary does not create non-existent entities. (True/False)')


KeyboardInterrupt: 