In [None]:
!pip install langchain
!pip install pypdf
!pip install chromadb

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader


path = "path to source files"
claims_path = "path to target file to be checked"
doc = PyPDFLoader(path).load_and_split()
claims = PyPDFLoader(claims_path).load_and_split()
print("The number of pages is", len(doc), f"in {path}")
print("The number of pages is", len(claims), f"in {claims_path}")

In [None]:
print(doc[1])

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
chunks = text_splitter.split_documents(doc)
for item in chunks:
    print("\nPARAGRAPH:\n")
    print(item.page_content)

In [None]:
from langchain.vectorstores import Chroma
import openai
from langchain.embeddings.openai import OpenAIEmbeddings


openai.api_key = 'YOUR KEY'
embedding = OpenAIEmbeddings(openai_api_key=openai.api_key)

persist_directory = 'docs/chroma'
vectordb = Chroma.from_documents(
    documents=chunks,
    embedding=embedding,
    persist_directory=persist_directory
)

In [None]:
claim = """WELIREG may cause serious side effects. WELIREG may cause harm to your unborn baby.
• A pregnancy test will be done before you start treatment.
• Birth control methods that contain hormones may not work as well during treatment.
• Females and males with female partners who can become pregnant should use an effective
form of non-hormonal birth control (contraception) during treatment and for 1 week after
your last dose."""

def ask_from_pdf(question, vectordb, k=5):
    docs = vectordb.similarity_search(question, k)
    return docs

result = ask_from_pdf(claim, vectordb)

In [None]:
top_paragraphs = [result[i].dict()["page_content"] for i in range(len(result))]
for par in top_paragraphs:
    print(par, "\n\n")

In [None]:
from langchain import PromptTemplate


template = """Given this claim: {claim}
And these source paragraphs: {search_results}
Could you list only the factual errors and serious inconsistencies in the provided claim given these sources? Note, that the claim can be correct, return "Good" in this case
"""

prompt = PromptTemplate.from_template(template)
input_prompt = prompt.format(claim=claim, search_results="\n".join(top_paragraphs))
print(input_prompt)

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)

chat = ChatOpenAI(openai_api_key=openai.api_key, temperature=0)
model_output = chat([HumanMessage(content=input_prompt)])

In [None]:
print(model_output.content)