In [8]:
!pip install -q cassio datasets langchain openai tiktoken

In [9]:
# LangChain components to use
from langchain.vectorstores.cassandra import Cassandra
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain.llms import OpenAI
from langchain.embeddings import OpenAIEmbeddings

# Support for dataset retrieval with Hugging Face
from datasets import load_dataset

# With CassIO, the engine powering the Astra DB integration in LangChain,
# you will also initialize the DB connection:
import cassio

In [10]:
!pip install PyPDF2



In [11]:
from PyPDF2 import PdfReader

In [12]:
ASTRA_DB_APPLICATION_TOKEN = ""# enter the "AstraCS:..." string found in in your Token JSON file
ASTRA_DB_ID = "" # enter your Database ID

OPENAI_API_KEY = "" # enter your OpenAI key

In [13]:
# provide the path of  pdf file/files.
pdfreader = PdfReader('GOT.pdf')

In [14]:
from typing_extensions import Concatenate
# read text from pdf
raw_text = ''
for i, page in enumerate(pdfreader.pages):
    content = page.extract_text()
    if content:
        raw_text += content

Initialize the connection to your database:



In [16]:
cassio.init(token=ASTRA_DB_APPLICATION_TOKEN, database_id=ASTRA_DB_ID)

Creating the LangChain embedding and LLM objects for later usage:

In [18]:
llm = OpenAI(openai_api_key=OPENAI_API_KEY)
embedding = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

In [19]:
astra_vector_store = Cassandra(
    embedding=embedding,
    table_name="qa_mini_demo",
    session=None,
    keyspace=None,
)

In [20]:
from langchain.text_splitter import CharacterTextSplitter
# We need to split the text using Character Text Split such that it sshould not increse token size
text_splitter = CharacterTextSplitter(
    separator = "\n",
    chunk_size = 800,
    chunk_overlap  = 200,
    length_function = len,
)
texts = text_splitter.split_text(raw_text)

In [None]:
texts[:50]

In [21]:
astra_vector_store.add_texts(texts[:50])

print("Inserted %i headlines." % len(texts[:50]))

astra_vector_index = VectorStoreIndexWrapper(vectorstore=astra_vector_store)

Inserted 50 headlines.


In [24]:
first_question = True
while True:
    if first_question:
        q_text = input("\nEnter the question(or q to quit): ").strip()
        first_question = False
    else:
         q_text = input("\nEnter another question(or q to quit): ").strip()
        
    if q_text.lower() == 'q':
        break
   
    
    answer = astra_vector_index.query(q_text, llm = llm).strip()
    
    print(f"""
    question: "{q_text}"
    answer: "{answer}"
    
    Documets for relevance
    """)
    
    for doc,score in astra_vector_store.similarity_search_with_score(q_text,k = 4):
        print(f"   [{score:.4f}] \"{doc.page_content[:80]}...\"")
              
    


Enter the question(or q to quit):  who is the current king



    question: "who is the current king"
    answer: "The current king is not mentioned in this context, so it is not possible to determine who the current king is."
    
    Documets for relevance
    
   [0.8658] "the Seven Kingdoms and Protector of the Realm, by the word of Eddard of the Hous..."
   [0.8645] "Bran had no answer for that. “ King Robert has a heads man, ” he said, uncertain..."
   [0.8566] "“One day, Bran, you will be Robb ’s bannerman,  holding a keep of your own for y..."
   [0.8564] "But the man they found bound hand and foot to the holdfast wall awaiting the kin..."



Enter another question(or q to quit):  what is the symbol of starks



    question: "what is the symbol of starks"
    answer: "The symbol of the Starks is a grey direwolf racing across an ice-white field."
    
    Documets for relevance
    
   [0.8952] "in the evening and talk softly of the age of heroes and the children of the fore..."
   [0.8936] "the Seven Kingdoms and Protector of the Realm, by the word of Eddard of the Hous..."
   [0.8929] "horses, with Bran between them on his pony, trying to seem older than seven, try..."
   [0.8854] "struck by lightning. Will knelt, looked around warily, and snatched it up. The b..."



Enter another question(or q to quit):  who is aegon



    question: "who is aegon"
    answer: "I don't know. There is no mention of anyone named Aegon in the given context."
    
    Documets for relevance
    
   [0.8750] "Theon Greyjoy brought forth the sword. “ Ice,” that sword was called. It was as ..."
   [0.8726] "in the evening and talk softly of the age of heroes and the children of the fore..."
   [0.8724] "the Seven Kingdoms and Protector of the Realm, by the word of Eddard of the Hous..."
   [0.8659] "But the man they found bound hand and foot to the holdfast wall awaiting the kin..."



Enter another question(or q to quit):  q
