In [None]:
# âœ… Install necessary packages (Run this once in Jupyter Notebook)
!pip install -q transformers sentence-transformers langchain faiss-cpu unstructured nltk torch accelerate

In [None]:
# âœ… Install necessary packages (Run this once in Jupyter Notebook)
# !pip install -q transformers sentence-transformers langchain faiss-cpu unstructured nltk torch accelerate

import os
import pickle
import nltk
import torch
import numpy as np
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# âœ… Download necessary NLTK resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

# âœ… Use Open Source GPT-2 Model
MODEL_NAME = "gpt2"  # âœ… GPT-2: Fully Open Source, Works in Jupyter
TOKENIZER_NAME = "gpt2"

# âœ… Load GPT-2 Model & Tokenizer
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, torch_dtype=torch.float16, device_map="auto"
)

# âœ… Set up the LLM pipeline using LangChain's HuggingFacePipeline
hf_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)
llm = HuggingFacePipeline(pipeline=hf_pipeline)  # âœ… Fix: Use LangChain Wrapper

# âœ… Load articles from URLs
urls = [
    "https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html",
    "https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html"
]

loaders = UnstructuredURLLoader(urls=urls)
data = loaders.load()

# âœ… Ensure data is loaded
print(f"Loaded {len(data)} documents")

# âœ… Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = text_splitter.split_documents(data)

# âœ… Use Hugging Face Embeddings
embedding_model = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=embedding_model)

# âœ… Create FAISS vector index
vectorindex = FAISS.from_documents(docs, embeddings)

# âœ… Create Retrieval Chain (FIXED)
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorindex.as_retriever())  # âœ… FIX: Now llm is a valid Runnable

# âœ… Define Query
query = "What is the price of Tiago iCNG?"

# âœ… Enable Debugging
import langchain
langchain.debug = True

# âœ… Run Query using Hugging Face Model
response = chain({"question": query}, return_only_outputs=True)

# âœ… Print Response
print("\nðŸ”¹ Answer:")
print(response)


[nltk_data] Downloading package punkt to C:\Users\charansai
[nltk_data]     putta\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\charansai putta\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
Device set to use cpu
  llm = HuggingFacePipeline(pipeline=hf_pipeline)  # âœ… Fix: Use LangChain Wrapper


Loaded 2 documents


  embeddings = HuggingFaceEmbeddings(model_name=embedding_model)





  response = chain({"question": query}, return_only_outputs=True)
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "What is the price of Tiago iCNG?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "context": "The company also said it has also introduced the twin-cylinder technology on its Tiago and Tigor models.\n\nThe Tiago iCNG is priced between Rs 6.55 lakh and Rs 8.1 lakh, while the Tigor iCNG comes at a price range of Rs 7.8 lakh to Rs 8.95 lakh.\n\nTata Motors Passenger Vehicles Ltd Head-Marketing, Vinay Pant said these introductions put together will make the company's CNG line up \"appealing, holistic, and stronger than ever\".\n\nPTI\n\nfirst published: Aug 4, 2023 02:17 pm\n\nDiscover 