In [None]:
!pip install -q transformers accelerate langchain faiss-cpu pypdf sentence-transformers

In [None]:
!pip install -U langchain langchain-community

In [None]:
from langchain.document_loaders import PyPDFLoader

# === Load PDF ===
pdf_path = "/content/Stock_Market_Performance_2024.pdf"  # Update this path after uploading
loader = PyPDFLoader(pdf_path)
pages = loader.load()

In [None]:
import requests
print(requests.get("https://huggingface.co").status_code)


In [None]:
# Install necessary packages if not already installed
!pip install -q transformers langchain sentence-transformers faiss-cpu pypdf

In [None]:
# === Import Dependencies ===
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFLoader
import torch

# === Load and Split PDF ===
pdf_path = "/content/Stock_Market_Performance_2024.pdf"  # Upload this manually in Colab
loader = PyPDFLoader(pdf_path)
pages = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = text_splitter.split_documents(pages)

# === Build Embeddings and Retriever ===
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.from_documents(docs, embedding_model)
retriever = db.as_retriever(search_kwargs={"k": 5})

# === Load QA Model ===
model_name = "google/flan-t5-base"  # Use 'flan-t5-large' if you want slightly better performance
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to("cuda" if torch.cuda.is_available() else "cpu")

qa_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer, max_new_tokens=256, device=0)

# === RAG Ask Function ===
def ask(question):
    context_docs = retriever.get_relevant_documents(question)
    context = "\n\n".join([doc.page_content for doc in context_docs[:3]])
    prompt = f"Context: {context}\n\nQuestion: {question}\nAnswer:"
    result = qa_pipeline(prompt)[0]["generated_text"]
    print("\n=== ANSWER ===")
    print(result.strip())

# === Interactive Loop ===
print("PDF RAG Chat Ready. Type 'exit' to stop.")
while True:
    q = input("\nAsk a question: ")
    if q.lower().strip() in ['exit', 'quit']:
        break
    ask(q)