In [1]:
from langchain.vectorstores.cassandra import Cassandra
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings

from datasets import load_dataset
import os
from dotenv import load_dotenv
#import cassio

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from PyPDF2 import PdfReader

In [3]:
load_dotenv()
ASTRA_DB_APPLICATION_TOKEN=os.getenv("ASTRA_DB_APPLICATION_TOKEN")
ASTRA_DB_ID=os.getenv("ASTRA_DB_ID")
groq_api_key=os.getenv("GROQ_API_KEY")
hf_token=os.getenv('HF_TOKEN')

In [5]:
pdf=PdfReader("IKSpeech.pdf")

In [6]:
from typing_extensions import Concatenate
# read text from pdf
raw_text = ''
for i, page in enumerate(pdf.pages):
    content = page.extract_text()
    if content:
        raw_text += content

In [7]:
raw_text

"1 \n Statement by the Prime Min ister of Pakistan H.E. Imran Khan to the \nSeventy -sixth Session of the UN General Assembly  \n24 September 2021  \n \n بِسْمِ هللاِ الرَّحْمٰنِ الرَّحِيْمِ \n \nنَسْتَعِين   وَإِيَّاكَ  نَعْب د   إِيَّاكَ  \n \nMr. President , \n \nI congratulate you on ass uming the presiden cy of the 76th session of the General \nAssembly.  \n \nI also wish to express appreciation for the significant achievements of your \npredecessor, Volkan Bozkir, who guided the Assembly skilfully under the \ndifficult circumstances impo sed by the Covid-19 pandemic.  \n \nMr. President , \n \nThe world is facing triple challenge  of the Covid -19, the accompanying \neconomic crisis, and the threats posed by climate change.  \n \nThe virus does not discriminate between nations and people. Nor do the \ncatastrophes impos ed by uncertain we ather patterns.  \n \nThe common threats faced by us today not  only expose the fragility of the \ninternational system; they also underscore t

In [8]:
import cassio
cassio.init(token=ASTRA_DB_APPLICATION_TOKEN, database_id=ASTRA_DB_ID)

In [14]:
llm=ChatGroq(model="llama-3.1-8b-instant",groq_api_key=groq_api_key)
embedding=HuggingFaceEmbeddings()

In [11]:
llm.invoke('hello').content

'Hello. Is there something I can help you with or would you like to chat?'

In [16]:
astra_vector_store=Cassandra(
    embedding=embedding,
    table_name="pdf_query_db",
    session=None,
    keyspace=None
)

In [19]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter=CharacterTextSplitter(
    separator="/n",
    chunk_size=800,
    chunk_overlap= 200,
    length_function=len,
)
texts=text_splitter.split_text(raw_text)

In [20]:
texts[:50]

["1 \n Statement by the Prime Min ister of Pakistan H.E. Imran Khan to the \nSeventy -sixth Session of the UN General Assembly  \n24 September 2021  \n \n بِسْمِ هللاِ الرَّحْمٰنِ الرَّحِيْمِ \n \nنَسْتَعِين   وَإِيَّاكَ  نَعْب د   إِيَّاكَ  \n \nMr. President , \n \nI congratulate you on ass uming the presiden cy of the 76th session of the General \nAssembly.  \n \nI also wish to express appreciation for the significant achievements of your \npredecessor, Volkan Bozkir, who guided the Assembly skilfully under the \ndifficult circumstances impo sed by the Covid-19 pandemic.  \n \nMr. President , \n \nThe world is facing triple challenge  of the Covid -19, the accompanying \neconomic crisis, and the threats posed by climate change.  \n \nThe virus does not discriminate between nations and people. Nor do the \ncatastrophes impos ed by uncertain we ather patterns.  \n \nThe common threats faced by us today not  only expose the fragility of the \ninternational system; they also underscore 

In [21]:
astra_vector_store.add_texts(texts[:50])

print("Inserted %i headlines." % len(texts[:50]))

astra_vector_index = VectorStoreIndexWrapper(vectorstore=astra_vector_store)

Inserted 1 headlines.


In [22]:
first_question = True
while True:
    if first_question:
        query_text = input("\nEnter your question (or type 'quit' to exit): ").strip()
    else:
        query_text = input("\nWhat's your next question (or type 'quit' to exit): ").strip()

    if query_text.lower() == "quit":
        break

    if query_text == "":
        continue

    first_question = False

    print("\nQUESTION: \"%s\"" % query_text)
    answer = astra_vector_index.query(query_text, llm=llm).strip()
    print("ANSWER: \"%s\"\n" % answer)

    print("FIRST DOCUMENTS BY RELEVANCE:")
    for doc, score in astra_vector_store.similarity_search_with_score(query_text, k=4):
        print("    [%0.4f] \"%s ...\"" % (score, doc.page_content[:84]))


QUESTION: "what was imran khan statement ?"
ANSWER: "Imran Khan's statement was made to the 76th Session of the UN General Assembly on September 24, 2021. Here are the main points from his statement:

1. **Global Challenges**: Imran Khan highlighted the triple challenge of the COVID-19 pandemic, economic crisis, and climate change, and emphasized that these threats expose the fragility of the international system and underscore the oneness of humanity.

2. **Pakistan's Success in Containing COVID-19**: He mentioned that Pakistan has been successful in containing the COVID-19 pandemic through a calibrated strategy of "smart lockdowns," which helped save lives and livelihoods and kept the economy afloat.

3. **Climate Change**: Imran Khan emphasized that climate change is one of the primary existential threats facing the planet and that Pakistan, despite contributing negligibly to global emissions, is among the 10 most vulnerable countries to its effects.

4. **Comprehensive Strategy to