In [1]:
import os
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.llms.base import LLM
from typing import Any, List, Mapping, Optional
import google.generativeai as genai



  from .autonotebook import tqdm as notebook_tqdm


In [2]:

# Load environment variables and configure Gemini
load_dotenv()
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))


In [3]:

# Minimal custom LLM class for Gemini
class GeminiProLLM(LLM):
    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        return genai.GenerativeModel('gemini-pro').generate_content(prompt).text

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        return {"model_name": "gemini-pro"}

    @property
    def _llm_type(self) -> str:
        return "gemini-pro"



In [4]:
# Specify the path to your PDF file
pdf_path = "/home/vivek/Documents/prashantPer/suyoxxx/github/PDFChat-Gemini-LangChain/data/26317-Article Text-30380-1-2-20230626.pdf"

In [5]:
# Extract text from the PDF
pdf_reader = PdfReader(pdf_path)
text = ""
for page in pdf_reader.pages:
    text += page.extract_text()


In [9]:
len(text)

42210

In [10]:

# Split the text into chunks
text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=500,
    chunk_overlap=200,
    length_function=len
)


In [11]:
text_splitter

<langchain_text_splitters.character.CharacterTextSplitter at 0x74f88008b2b0>

In [12]:
text_chunks = text_splitter.split_text(text)


In [13]:
text_chunks[0]

'Are Transformers Effective for Time Series Forecasting?\nAiling Zeng1,2*, Muxi Chen1*, Lei Zhang2, Qiang Xu1\n1The Chinese University of Hong Kong\n2International Digital Economy Academy\n{zengailing, leizhang}@idea.edu.cn,{mxchen21, qxu}@cse.cuhk.edu.hk\nAbstract\nRecently, there has been a surge of Transformer-based solu-\ntions for the long-term time series forecasting (LTSF) task.\nDespite the growing performance over the past few years,'

In [14]:
len(text_chunks)

139

In [16]:

# Create embeddings and vector store
embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)



load INSTRUCTOR_Transformer
max_seq_length  512


In [17]:
# Create Gemini LLM instance
gemini_llm = GeminiProLLM()

# Set up the conversation chain
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=gemini_llm,
    retriever=vectorstore.as_retriever(),
    memory=memory
)
# print(conversation_chain)
# Test the system with some questions
# questions = [
#     "What is the main topic of the PDF?",

In [18]:
print(conversation_chain)

questions = [
    "explain Transformer-Based LTSF Solutions"]
   
question1 = [
    "explain the previous answer in bullet points"]


for question in questions:
    response = conversation_chain({'question': question})
    print(f"Question: {question}")
    print(f"Answer: {response['answer']}")
    print("--------------------")

memory=ConversationBufferMemory(chat_memory=InMemoryChatMessageHistory(messages=[]), return_messages=True, memory_key='chat_history') verbose=False combine_docs_chain=StuffDocumentsChain(verbose=False, llm_chain=LLMChain(verbose=False, prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\n{context}\n\nQuestion: {question}\nHelpful Answer:"), llm=GeminiProLLM(), output_parser=StrOutputParser(), llm_kwargs={}), document_prompt=PromptTemplate(input_variables=['page_content'], input_types={}, partial_variables={}, template='{page_content}'), document_variable_name='context') question_generator=LLMChain(verbose=False, prompt=PromptTemplate(input_variables=['chat_history', 'question'], input_types={}, partial_variables={}, template='Given the following conversation and a f

  response = conversation_chain({'question': question})


Question: explain Transformer-Based LTSF Solutions
Answer: Transformer-based LTSF solutions utilize transformer models, which have gained prominence in natural language processing and computer vision due to their effective multi-head self-attention mechanism. This has sparked research in Transformer-based time series modeling techniques, particularly for the LTSF task. However, the permutation-invariant nature of the self-attention mechanism can result in temporal information loss in LTSF.
--------------------


In [None]:
# Interactive loop for user questions
while True:
    user_question = input("Ask a question about the PDF (or type 'exit' to quit): ")
    if user_question.lower() == 'exit':
        break
    response = conversation_chain({'question': user_question})
    print(f"Answer: {response['answer']}")
    print("--------------------")

In [19]:
import torch

# Print current GPU memory usage
print(torch.cuda.memory_allocated())

# Print peak GPU memory usage
print(torch.cuda.max_memory_allocated())

7188414464
8610302464


In [20]:
import torch

def print_gpu_memory():
    if torch.cuda.is_available():
        print(f"GPU memory allocated: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
        print(f"GPU memory reserved: {torch.cuda.memory_reserved() / 1e9:.2f} GB")

# Call this function before and after major operations to see memory changes
print_gpu_memory()

GPU memory allocated: 7.19 GB
GPU memory reserved: 10.57 GB
