In [1]:
import os
import PyPDF2
import re
from dotenv import load_dotenv
from genai.extensions.langchain import LangChainInterface
from genai.schemas import GenerateParams
from genai.credentials import Credentials

In [2]:
# Load environment variables
load_dotenv()
api_key = os.getenv("GENAI_KEY", None)
api_url = os.getenv("GENAI_API", None)
creds = Credentials(api_key, api_endpoint=api_url)

In [3]:
# Load the PDF file and extract text
pdf_path = "examples/POC datasets/Car Parts Sales Contract1.pdf"
with open(pdf_path, "rb") as file:
    reader = PyPDF2.PdfReader(file)
    text = ""
    for page in reader.pages:
        text += page.extract_text()

In [4]:
# Find the discount information in the extracted text
discount = None
discount_match = re.search(r"(\d+(?:\.\d+)?)%", text)
if discount_match:
    discount = float(discount_match.group(1))

print(f"The discount mentioned in the document is: {discount}%")

The discount mentioned in the document is: 10.0%


In [5]:
# Use GenAI LangChain model to answer questions based on the extracted text
print("\n------------- Example (LangChain)-------------\n")

params = GenerateParams(decoding_method="greedy")

print("Using GenAI Model expressed as LangChain Model via LangChainInterface:")

langchain_model = LangChainInterface(model="google/flan-t5-xxl", params=params, credentials=creds)
question = "What is the company mentioned in the document? The context is: " + text
answer = langchain_model(question)

print(f"Answer: {answer}")


------------- Example (LangChain)-------------

Using GenAI Model expressed as LangChain Model via LangChainInterface:
Answer: Auto Ltd
