In [92]:
import os
import streamlit as st
import pickle
import time
import langchain
from langchain import HuggingFaceHub
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
# import sentence_transformers

In [93]:
model_name = "gpt2"
llm = HuggingFaceHub(repo_id=model_name, model_kwargs={"temperature":0.9,"max_length":500})

# Initialize the embeddings using a Hugging Face model
embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)

In [94]:
from dotenv import load_dotenv
import os

# Load the environment variables from the .env file
load_dotenv()

# Get the API key from the environment variable
huggingface_api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")

if huggingface_api_token is None:
    raise ValueError("HUGGINGFACEHUB_API_TOKEN not found. Make sure it's set in the .env file.")


In [95]:
os.environ['HUGGINGFACEHUB_API_TOKEN'] = huggingface_api_token

In [96]:
loader = UnstructuredURLLoader(
    urls = [
        "https://www.moneycontrol.com/news/business/banks/hdfc-bank-re-appoints-sanmoy-chakrabarti-as-chief-risk-officer-11259771.html",
        "https://www.moneycontrol.com/news/business/markets/market-corrects-post-rbi-ups-inflation-forecast-icrr-bet-on-these-top-10-rate-sensitive-stocks-ideas-11142611.html"
    ]
)

In [83]:
#Initiaizer LLM wth the required parameters
llm = OpenAI(temperature = 0.9,max_tokens = 500)

loaders = UnstructuredURLLoader(urls = [
    "https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html",
    "https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html"
]
)


In [97]:

data = loader.load()
len(data)

2

In [98]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
# As data is that of type documents we can directly use split_documents over split_text to retrieve the chunks
docs = text_splitter.split_documents(data)
len(docs) 


29

In [99]:
docs[4]

Document(metadata={'source': 'https://www.moneycontrol.com/news/business/banks/hdfc-bank-re-appoints-sanmoy-chakrabarti-as-chief-risk-officer-11259771.html'}, page_content='Mutual Fund\n\nPre-Market\n\nIPO\n\nGlobal Market\n\nBudget 2024\n\nElections 2024\n\nGold Rate\n\nBSE Sensex\n\nForum\n\nMC 30\n\nNews\n\nBusiness\n\nMarkets\n\nStocks\n\nIncome Tax Calculator\n\nElection Schedule 2024\n\nIndia News\n\nEconomy\n\nMutual Funds\n\nPersonal Finance\n\nIPO News\n\nStartups\n\nStocks: A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | X | Y | Z | Others\n\nMutual Funds: A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | X | Y | Z\n\nVisit the App Store to see all our apps:\n\nDownload from Google PlayDownload from APP StoreDownload from Windows Phone\n\nTools\n\nRetirement Planning\n\nEMI Calculator\n\nSIP Calculator\n\nSIP Planner\n\nUseful Links\n\nCrypto News\n\nBank Holidays in India\n\nGold Rate Today

In [101]:

# Create the FAISS index using the Hugging Face embeddings
vectorindex_hugging_face = FAISS.from_documents(docs, embeddings)

ImportError: Could not import faiss python package. Please install it with `pip install faiss-gpu` (for CUDA supported GPU) or `pip install faiss-cpu` (depending on Python version).

In [104]:
from langchain.vectorstores import faiss
texts = [doc.page_content for doc in docs]

# Create the FAISS index using the Hugging Face embeddings
vectorindex_hugging_face = FAISS.from_texts(texts, embeddings)

In [106]:
# Storing vector index in local ##Vector database

file_path = "vector_index.pkl"
with open(file_path, 'wb') as f:
    pickle.dump(vectorindex_hugging_face, f)

In [108]:
# Loading pickle file into memory
if os.path.exists(file_path):
    with open(file_path, 'rb') as f:
        vectorIndex = pickle.load(f)

In [110]:
chain = RetrievalQAWithSourcesChain.from_llm(llm= llm,retriever = vectorIndex.as_retriever())
chain



In [111]:
query = "what is the price of tiago iCNG"

langchain.debug = True

chain({"question": query}, return_only_outputs=True)

  chain({"question": query}, return_only_outputs=True)


[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "what is the price of tiago iCNG"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "context": "Expert: Foram Chheda, CMT, technical research analyst and founder of ChartAnalytics.co.in\n\nHDFC Bank: Buy | LTP: Rs 1,647 | Stop-Loss: Rs 1,612 | Target: Rs 1,720 | Return: 4.4 percent\n\nAfter marking a high on a closing basis in July this year, HDFC Bank's stock price saw a corrective decline and halted at Rs 1,579-1,580 levels which was in close proximity to the 200-day MA (moving average) marking it as a strong support level.\n\nThe price movement after that led to the development of t

BadRequestError: (Request ID: 6xru6oNFwV9nGkClLB6jQ)

Bad request:
Authorization header is correct, but the token seems invalid