# Step1:Import necessary libraries

In [1]:
!pip install -q llama-index

In [2]:
pip install pdfplumber



In [3]:
!pip install sentence_transformers



In [4]:
import requests
import pdfplumber
import re
from sentence_transformers import SentenceTransformer
from sentence_transformers import SentenceTransformer
from sklearn.neighbors import NearestNeighbors
from transformers import GPT2LMHeadModel, GPT2Tokenizer


  from tqdm.autonotebook import tqdm, trange


## Step 2: Load and process the PDF

In [None]:
def load_book_text(pdf_path):
    """Load text from PDF using pdfplumber."""
    try:
        with pdfplumber.open(pdf_path) as pdf:
            text = ""
            for page in pdf.pages:
                text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error loading PDF: {e}")
        return ""

## Step 3: Split text into paragraphs

In [10]:
def split_text_into_paragraphs(text):
    """Split the book text into paragraphs."""
    paragraphs = text.splitlines()  # Split by newlines
    paragraphs = [p.strip() for p in paragraphs if p.strip()]  # Remove empty paragraphs
    return paragraphs

## Step 4: Retrieve relevant passages

In [11]:
def retrieve_passages(query, paragraphs, paragraph_embeddings, model, k=5):
    """Find the k most similar paragraphs to the query."""
    query_embedding = model.encode([query])
    nbrs = NearestNeighbors(n_neighbors=min(k, len(paragraphs)), metric='cosine').fit(paragraph_embeddings)
    distances, indices = nbrs.kneighbors(query_embedding)
    return [(paragraphs[i], distances[0][j]) for j, i in enumerate(indices[0])]

## Step 5: Generate an answer using GPT

In [15]:
def generate_answer(context, question, gpt_model, tokenizer):
    """Generate an answer using GPT-2 based on the context."""
    input_text = f"Context: {context}\nQuestion: {question}\nAnswer:"
    inputs = tokenizer.encode(input_text, return_tensors="pt")
    outputs = gpt_model.generate(inputs, max_length=200, num_return_sequences=1)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [16]:
# Load models
st_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
gpt_model = GPT2LMHeadModel.from_pretrained("gpt2")  # GPT-2 model
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")  # GPT-2 tokenizer

## Step 6: Main function to run the question-answer system

In [20]:
def main():
    # Load the book
    pdf_path = "Principles_of_Finance.pdf"
    book_text = load_book_text(pdf_path)

    if not book_text:
        print("Could not load the book. Please check the PDF file path.")
        return

    # Preprocess the text
    paragraphs = split_text_into_paragraphs(book_text)
    print(f"Loaded {len(paragraphs)} paragraphs from the book.")

    # Encode paragraphs
    paragraph_embeddings = st_model.encode(paragraphs)

    # Ask user for a question
    question = input("Enter your question: ")

    # Retrieve relevant passages
    relevant_passages = retrieve_passages(question, paragraphs, paragraph_embeddings, st_model, k=5)
    context = " ".join([p[0] for p in relevant_passages])  # Concatenate relevant passages

    print("\nRelevant passages:")
    for passage, distance in relevant_passages:
        print(f"{passage} (similarity: {1 - distance:.2f})")

    # Generate an answer
    answer = generate_answer(context, question, gpt_model, tokenizer)
    print("\nAnswer:")
    print(answer)

# Run the main function
if __name__ == "__main__":
    main()

Loaded 20665 paragraphs from the book.
Enter your question: What is the time value of money?


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.



Relevant passages:
This chapter has explored the time value of money by expanding on the concepts discussed inTime Value of (similarity: 0.85)
How and Why the Passage of Time Affects the Value of Money (similarity: 0.80)
• Explain why time has an impact on the value of money. (similarity: 0.77)
revolve around the issue of the time value of money. (similarity: 0.77)
The entire concept of the time value of money is particularly important because it allows savers and investors (similarity: 0.77)

Answer:
Context: This chapter has explored the time value of money by expanding on the concepts discussed inTime Value of How and Why the Passage of Time Affects the Value of Money • Explain why time has an impact on the value of money. revolve around the issue of the time value of money. The entire concept of the time value of money is particularly important because it allows savers and investors
Question: What is the time value of money?
Answer: The time value of money is the sum of the time v