In [1]:
from decouple import config
import os
import uuid

from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores.chroma import Chroma

from langchain_ollama import ChatOllama
from langchain_community.llms import LlamaCpp
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

from PDFChatBot import PDFChatBot

In [2]:
session_id = str(uuid.uuid4()).replace('-', '_')

In [3]:
if 'OLLAMA_API_BASE_URL' not in os.environ:
    os.environ["OPENAI_API_KEY"] = config('OPENAI_API_KEY')
OLLAMA_API_BASE_URL = os.environ['OLLAMA_API_BASE_URL'] if 'OLLAMA_API_BASE_URL' in os.environ else config('OLLAMA_API_BASE_URL')   
LLM = os.environ['LLM'] if 'LLM' in os.environ else config('LLM')   
EMBEDDING_MODEL = os.environ['EMBEDDING_MODEL'] if 'EMBEDDING_MODEL' in os.environ else config('EMBEDDING_MODEL')  

In [4]:
print(f'Using embedding model: {EMBEDDING_MODEL}')
embedding_model = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)

Using embedding model: sentence-transformers/all-MiniLM-L6-v2


  from tqdm.autonotebook import tqdm, trange


In [5]:
print(f'Using LLM: {LLM}')
llm = ChatOllama(
    base_url=OLLAMA_API_BASE_URL, 
    model=LLM
)

Using LLM: llama3.1:8b


In [6]:
chat_bot = PDFChatBot('/Users/stolli/IT/Designing Data-Intensive Applications.pdf', embedding_model, llm)

incorrect startxref pointer(1)


Initializing PDF Chatbot ...
--- Loading and vectorizing PDF file ---


parsing for Object Streams


--- Initializing history aware retriever ---
--- Initializing Q & A chain ---
--- Initializing RAG chain ---


In [7]:
stream_response = []
for chunk in chat_bot.stream_response('What is partitioning?', session_id):
    stream_response.append(chunk)
    print(chunk, end="\n", flush=True)

--- Streaming response ---
{'input': 'What is partitioning?', 'chat_history': []}


{'context': [Document(metadata={'page': 233, 'source': '/Users/stolli/IT/Designing Data-Intensive Applications.pdf'}, page_content='both key-range and hash partitioning, and it splits partitions dynamically in either\ncase.\nPartitioning proportionally to nodes\nWith dynamic partitioning, the number of partitions is proportional to the size of the\ndataset, since the splitting and merging processes keep the size of each partition\nbetween some fixed minimum and maximum. On the other hand, with a fixed num‐\n212 | Chapter 6: Partitioning'), Document(metadata={'page': 233, 'source': '/Users/stolli/IT/Designing Data-Intensive Applications.pdf'}, page_content='both key-range and hash partitioning, and it splits partitions dynamically in either\ncase.\nPartitioning proportionally to nodes\nWith dynamic partitioning, the number of partitions is proportional to the size of the\ndataset, since the splitting and merging processes keep the size of each partition\nbetween some fixed minimum and m

In [8]:
''.join([chunk['answer'] for chunk in stream_response if 'answer' in chunk.keys()])

'In the context provided, partitioning refers to the process of dividing data into smaller, more manageable chunks called partitions or buckets. This can be done in different ways, such as:\n\n* Key-range partitioning: dividing data based on a specific range or key value\n* Hash partitioning: dividing data using a hash function to distribute it across multiple partitions\n\nThe goal of partitioning is often to improve the performance and scalability of databases or data storage systems by reducing the load on individual partitions and allowing for more efficient querying and access to data.'

In [9]:
response = chat_bot.get_response('What is the book about? Please summarize it in around 20 sentences. Include a list of the most important topics', session_id=session_id)

--- Generating response ---


In [10]:
print(response['answer'])

Based on the provided context, I couldn't find any information about the main topic or theme of the book. However, I can try to provide some general insights based on the snippets you've shared.

The book appears to be discussing various concepts related to computer science and programming, specifically in the area of concurrency and synchronization. The text mentions linearizability, consistent snapshots, and lock contention, which suggests that the book may cover topics such as:

**Most Important Topics:**

1. Linearizability
2. Consistent Snapshots
3. Lock Contention
4. Concurrency and Synchronization
5. Data Partitioning

Unfortunately, without more context or information about the book's content, I couldn't provide a detailed summary or insights into its main theme.

However, based on the snippets provided, here's a possible outline of what the book might cover:

* Chapter 1: Introduction to Concurrency and Synchronization
	+ Overview of concurrency challenges and synchronization 

In [11]:
response['context'][0].page_content

'on page 261) is not linearizable: by design, it makes reads from a consistent snapshot,\nto avoid lock contention between readers and writers. The whole point of a consistent\nsnapshot is that it does not include writes that are more recent than the snapshot, and\nthus reads from the snapshot are not linearizable.\nLinearizability | 329'

In [12]:
for document in response['context']:
    print(f'Source: {document.metadata["source"]}')
    print(f'Page: {document.metadata["page"]}')
    print(f'Content: {document.page_content}\n')


Source: /Users/stolli/IT/Designing Data-Intensive Applications.pdf
Page: 350
Content: on page 261) is not linearizable: by design, it makes reads from a consistent snapshot,
to avoid lock contention between readers and writers. The whole point of a consistent
snapshot is that it does not include writes that are more recent than the snapshot, and
thus reads from the snapshot are not linearizable.
Linearizability | 329

Source: /Users/stolli/IT/Designing Data-Intensive Applications.pdf
Page: 350
Content: on page 261) is not linearizable: by design, it makes reads from a consistent snapshot,
to avoid lock contention between readers and writers. The whole point of a consistent
snapshot is that it does not include writes that are more recent than the snapshot, and
thus reads from the snapshot are not linearizable.
Linearizability | 329

Source: /Users/stolli/IT/Designing Data-Intensive Applications.pdf
Page: 350
Content: on page 261) is not linearizable: by design, it makes reads from a con

In [13]:
# chat_bot.get_response('What is partitioning?', session_id=session_id)

In [14]:
# chat_bot.get_response('Can you repeat the answer as structured list?', session_id=session_id)