In [5]:
from langchain.vectorstores.cassandra import Cassandra
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain_anthropic import ChatAnthropic
# from langchain_community.embeddings import OllamaEmbeddings

# Support for dataset retrieval with Hugging Face
from datasets import load_dataset

# With CassIO, the engine powering the Astra DB integration in Langchain
# Initialize database connection
import cassio

from PyPDF2 import PdfReader

from dotenv import load_dotenv
import os

load_dotenv()

CLAUDE_KEY = os.getenv("ANTHROPIC_API_KEY")
CLAUDE_MODEL = "claude-3-opus-20240229"


  from .autonotebook import tqdm as notebook_tqdm


In [6]:
pdfreader = PdfReader("Zarchiver.pdf")

from typing_extensions import Concatenate
raw_text = ""

for i, page in enumerate(pdfreader.pages):
  content = page.extract_text()
  if content:
    raw_text += content

In [7]:
cassio.init(token=os.getenv("ASTRA_DB_APPLICATION_TOKEN"), database_id=os.getenv("ASTRA_DB_ID"))

In [8]:
llm = ChatAnthropic(temperature=0.6, api_key=CLAUDE_KEY, model_name=CLAUDE_MODEL)

# embeddings = OllamaEmbeddings(model="llama3")

In [9]:
astra_vector_store = Cassandra(
  embedding=embeddings,
  table_name="qa_mini_demo",
  session=None,
  keyspace=None
)

In [10]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(
  separator="\n",
  chunk_size = 800,
  chunk_overlap = 200,
  length_function = len
)

texts = text_splitter.split_text(raw_text)

In [11]:
astra_vector_store.add_texts(texts)

astra_vector_index = VectorStoreIndexWrapper(vectorstore=astra_vector_store)

In [12]:
first_question = True
while True:
  if first_question:
    query_text = input("\nEnter your question (or type 'quit' to exit): ").strip()
  else:
    query_text = input("\nWhat's your next question (or type 'quit' to exit): ").strip()

  if query_text.lower() == "quit":
    break

  if query_text == "":
    continue

  first_question = False

  print("\nQUESTION: \"%s\"" % query_text)

  answer = astra_vector_index.query(query_text, llm = llm).strip()
  print("\nAnswer: \"%s\"" % answer)



QUESTION: "What is Zarchiver ?"

Answer: "According to the provided information, Zarchiver is a popular application designed for Android devices. It is widely used to compress files and decompress archive files.

Some key points about Zarchiver:

- It can read many different file formats
- It provides password protection for files
- It has facilities for system file management 
- It allows editing archived files
- It can partially decompress files as per the user's desire
- It is compatible with RAR and 7z file formats
- It has a user-friendly interface and is easy to use
- It is available in both free and paid versions
- Any modern Android device can run this app without issues

So in summary, Zarchiver is a feature-rich file archiving and compression tool designed specifically for the Android platform."
