In [22]:
import warnings
import os
from dotenv import load_dotenv

# LanChain lib
from langchain.chains import LLMCheckerChain
from langchain.chains.summarize import load_summarize_chain

from langchain_core.output_parsers import StrOutputParser
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import PromptTemplate

from langchain.document_loaders import UnstructuredHTMLLoader, PyPDFLoader, DirectoryLoader

from langchain.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter

from utils import to_markdown

In [21]:
load_dotenv()
warnings.filterwarnings("ignore")

GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

llm = ChatGoogleGenerativeAI(model="gemini-pro", 
                             google_api_key=GEMINI_API_KEY, 
                             temperature=0.7, 
                             top_p=0.85, 
                             top_k=3)


In [10]:
text = "What type of mammal lays the biggest eggs?"
checker_chain = LLMCheckerChain.from_llm(llm, verbose=True)
checker_chain.invoke(text)



[1m> Entering new LLMCheckerChain chain...[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


{'query': 'What type of mammal lays the biggest eggs?',
 'result': 'The question cannot be answered based on the given assertions and checks. The assertions and checks do not provide any information about the size of eggs laid by monotremes.'}

In [17]:
template = """
Article: {text}
You will generate increasingly concise, entity-dense summaries of the
above article.
Repeat the following 2 steps 5 times.
Step 1. Identify 1-3 informative entities (";" delimited) from the article
which are missing from the previously generated summary.
Step 2. Write a new, denser summary of identical length which covers every
entity and detail from the previous summary plus the missing entities.

A missing entity is:
- relevant to the main story,
- specific yet concise (5 words or fewer),
- novel (not in the previous summary),
- faithful (present in the article),
- anywhere (can be located anywhere in the article).

Guidelines:
- The first summary should be long (4-5 sentences, ~80 words) yet highly
non-specific, containing little information beyond the entities marked
as missing. Use overly verbose language and fillers (e.g., "this article
discusses") to reach ~80 words.
- Make every word count: rewrite the previous summary to improve flow and
make space for additional entities.
- Make space with fusion, compression, and removal of uninformative
phrases like "the article discusses".
- The summaries should become highly dense and concise yet self-contained,
i.e., easily understood without the article.
- Missing entities can appear anywhere in the new summary.
- Never drop entities from the previous summary. If space cannot be made,
add fewer new entities.
Remember, use the exact same number of words for each summary.
Answer in JSON. The JSON should be a list (length 5) of dictionaries whose
keys are "Missing_Entities" and "Denser_Summary".
"""

prompt = PromptTemplate.from_template(template=template)

output_parser = StrOutputParser()


In [18]:
chain = prompt | llm | output_parser

text = """
As discussed in previous chapters, hallucination in LLMs refers to the generated text being unfaithful or nonsensical compared to the input. It contrasts with faithfulness, where outputs stay
consistent with the source. Hallucinations can spread misinformation like disinformation, rumors,
and deceptive content. This poses threats to society, including distrust in science, polarization,
and democratic processes.
Journalism and archival studies have researched misinformation extensively. Fact-checking initiatives provide training and resources to journalists and independent checkers, allowing expert
verification at scale. Addressing false claims is crucial to preserving information integrity and
combatting detrimental societal impacts.
One technique to address hallucinations is automatic fact-checking – verifying claims made by
LLMs against evidence from external sources. This allows for catching incorrect or unverified
statements.
Fact-checking involves three main stages:
1. Claim detection: Identify parts needing verification
2. Evidence retrieval: Find sources supporting or refuting the claim
3. Verdict prediction: Assess claim veracity based on evidence
Alternative terms for the last two stages are justification production and verdict prediction.
We can see the general idea of these three stages illustrated in the following diagram (source –
https://github.com/Cartus/Automated-Fact-Checking-Resources by Zhijiang Guo):
Figure 4.1: Automatic fact-checking pipeline in three stages
Chapter 4 101
Pre-trained LLMs contain extensive world knowledge that can be prompted for facts. Additionally,
external tools can search knowledge bases, Wikipedia, textbooks, and corpora for evidence. By
grounding claims in data, fact-checking makes LLMs more reliable.
Pre-trained LLMs contain extensive world knowledge from their training data. Starting with the
24-layer BERT-Large in 2018, language models have been pre-trained on large knowledge bases
such as Wikipedia; therefore, they would be able to answer knowledge questions from Wikipedia
or – since their training set increasingly includes other sources – the internet, textbooks, arXiv,
and GitHub.
We can prompt them with masking and other techniques to retrieve facts for evidence. For example, to answer the question “Where is Microsoft’s headquarters located?”, the question would be
rewritten as “Microsoft’s headquarters is in [MASK]” and fed into a language model for the answer.
Alternatively, we can integrate external tools to search knowledge bases, Wikipedia, textbooks,
and other corpora. The key idea is verifying hallucinated claims by grounding them in factual
data sources.
Automatic fact-checking provides a way to make LLMs more reliable by checking that their responses align with real-world evidence. In the next sections, we’ll demonstrate this approach.
In LangChain, we have a chain available for fact-checking with prompt chaining, where a model actively questions the assumptions that went into a statement. In this self-checking chain,
LLMCheckerChain, the model is prompted sequentially – first, to make the assumptions explicit,
which looks like this:
Here's a statement: {statement}\nMake a bullet point list of the
assumptions you made when producing the above statement.\n
Please note that this is a string template, where the elements in curly brackets will be replaced
by variables. Next, these assumptions are fed back to the model in order to check them one by
one with a prompt like this:
Here is a bullet point list of assertions:
{assertions}
For each assertion, determine whether it is true or false. If it is
false, explain why.\n\n
Finally, the model is tasked to make a final judgment:
In light of the above facts, how would you answer the question
'{question}'
102 Building Capable Assistants
LLMCheckerChain does this all by itself, as this example shows:
from langchain.chains import LLMCheckerChain
from langchain.llms import OpenAI
llm = OpenAI(temperature=0.7)
text = "What type of mammal lays the biggest eggs?"
checker_chain = LLMCheckerChain.from_llm(llm, verbose=True)
checker_chain.run(text)
The model can return different results to this question, some of which are wrong, and some of
which it would correctly identify as false. When I was trying this out, I got results such as the blue
whale, the North American beaver, and the extinct Giant Moa in response to my question "What
type of mammal lays the biggest eggs?". The following is the right answer:
Monotremes, a type of mammal found in Australia and parts of New Guinea,
lay the largest eggs in the mammalian world. The eggs of the American
echidna (spiny anteater) can grow as large as 10 cm in length, and
dunnarts (mouse-sized marsupials found in Australia) can have eggs that
exceed 5 cm in length.
• Monotremes can be found in Australia and New Guinea
• The largest eggs in the mammalian world are laid by monotremes
• The American echidna lays eggs that can grow to 10 cm in length
• Dunnarts lay eggs that can exceed 5 cm in length
• Monotremes can be found in Australia and New Guinea – True
• The largest eggs in the mammalian world are laid by monotremes – True
• The American echidna lays eggs that can grow to 10 cm in length – False,
the American echidna lays eggs that are usually between 1 to 4 cm in
length.
• Dunnarts lay eggs that can exceed 5 cm in length – False, dunnarts lay
eggs that are typically between 2 to 3 cm in length.
The largest eggs in the mammalian world are laid by monotremes, which can
be found in Australia and New Guinea. Monotreme eggs can grow to 10 cm in
length.
> Finished chain.
Chapter 4 103
So, while this technique does not guarantee correct answers, it can put a stop to some incorrect
results. Fact-checking approaches involve decomposing claims into smaller checkable queries,
which can be formulated as question-answering tasks. Tools designed for searching domain
datasets can assist fact-checkers in finding evidence effectively. Off-the-shelf search engines
like Google and Bing can also retrieve both topically and evidentially relevant content to capture
the veracity of a statement accurately. We’ll apply this approach to return results based on web
searches and other applications of this chapter.
In the next section, we’ll discuss automating the process of summarizing texts and longer documents such as research papers.
"""

to_markdown(chain.invoke(text))

> ```json
> [
>   {
>     "Missing_Entities": "hallucination;LLMs",
>     "Denser_Summary": "This article discusses the issue of hallucination in LLMs, which refers to the generation of unfaithful or nonsensical text compared to the input. Hallucinations can spread misinformation, rumors, and deceptive content, posing threats to society such as distrust in science, polarization, and democratic processes."
>   },
>   {
>     "Missing_Entities": "fact-checking;verification",
>     "Denser_Summary": "One technique to address hallucinations is automatic fact-checking, which involves verifying claims made by LLMs against evidence from external sources. Fact-checking allows for catching incorrect or unverified statements and consists of three main stages: claim detection, evidence retrieval, and verdict prediction."
>   },
>   {
>     "Missing_Entities": "pre-trained LLMs;world knowledge",
>     "Denser_Summary": "Pre-trained LLMs contain extensive world knowledge from their training data, which can be prompted for facts. By grounding claims in data, fact-checking makes LLMs more reliable."
>   },
>   {
>     "Missing_Entities": "LangChain;LLMCheckerChain",
>     "Denser_Summary": "In LangChain, we have a chain available for fact-checking with prompt chaining, where a model actively questions the assumptions that went into a statement. In this self-checking chain, LLMCheckerChain, the model is prompted sequentially to make the assumptions explicit and then check them one by one."
>   },
>   {
>     "Missing_Entities": "question-answering tasks;domain datasets",
>     "Denser_Summary": "Fact-checking approaches involve decomposing claims into smaller checkable queries, which can be formulated as question-answering tasks. Tools designed for searching domain datasets can assist fact-checkers in finding evidence effectively."
>   }
> ]
> ```