In [2]:
import os
import streamlit as st
import pickle
import time
import langchain
from langchain import OpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

In [6]:
#load openAI api key
os.environ['OPENAI_API_KEY'] = 'sk-r12-72nQx-K_-IBtx7-szXzY3voxNsnfcD6hSI9r5PT3BlbkFJGxncVHubVKbbxUsWFxbvZ8lsMBArLsee-WcGCUbm4A'

In [7]:
# Initialise LLM with required params
llm = OpenAI(temperature=0.9, max_tokens=500) 

In [8]:
loaders = UnstructuredURLLoader(urls=[
    "https://www.moneycontrol.com/news/gold-rates-today/",
    "https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html"
])


In [9]:
data = loaders.load() 
len(data)

2

In [10]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

# As data is of type documents we can directly use split_documents over split_text in order to get the chunks.
docs = text_splitter.split_documents(data)

In [11]:
len(docs)

28

In [12]:
docs[0]

Document(metadata={'source': 'https://www.moneycontrol.com/news/gold-rates-today/'}, page_content='English\n\nHindi\n\nGujarati\n\nSpecials\n\nHello, Login\n\nHello, Login\n\nLog-inor Sign-Up\n\nMy Account\n\nMy Profile\n\nMy Portfolio\n\nMy Watchlist\n\nFREE Credit Score₹100 Cash Reward\n\nFixed Deposits\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nMy Profile\n\nMy PRO\n\nMy Portfolio\n\nMy Watchlist\n\nFREE Credit Score₹100 Cash Reward\n\nFixed Deposits\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nLogout\n\nChat with Us\n\nDownload App\n\nFollow us on:\n\nGo Ad-Free\n\nMy Alerts\n\n>->MC_ENG_DESKTOP/MC_ENG_NEWS/MC_ENG_NEWS_COMMODITY_PRICE_AS/MC_ENG_ROS_NWS_COMM_PRC_AS_ATF_728|~|MC_ENG_DESKTOP/MC_ENG_NEWS/MC_ENG_NEWS_COMMODITY_PRICE_AS/MC_ENG_ROS_NWS_COMM_PRC_AS_ATF_728\n\nGo PRO @₹99 PRO\n\nAdvertisement\n\nRemove Ad\n\nBusiness\n\nMarkets\n\nStocks\n\nEconomy\n\nCompanies\n\nTrends\n\nIPO\n\nOpinion\n\nEV Special\n\nHomeNewsGold rates today\n\nGOLD RATE IN INDIA\n\nAs on Oct, 01

In [13]:
from sentence_transformers import SentenceTransformer

# Load the Sentence Transformer model
model = SentenceTransformer('all-mpnet-base-v2')  # You can choose a different model if desired

# Create embeddings for the document chunks
embeddings = model.encode([doc.page_content for doc in docs])  # Extracting text content from Document objects

# Display the shape of the embeddings
print(f"Shape of embeddings: {embeddings.shape}")




Shape of embeddings: (28, 768)


In [14]:
import faiss
import numpy as np

# Assuming 'embeddings' is a NumPy array containing the embeddings for your document chunks
# Convert embeddings to float32 as required by FAISS
embeddings = embeddings.astype(np.float32)

# Create a FAISS index
dimension = embeddings.shape[1]  # Get the number of dimensions of the embeddings
index = faiss.IndexFlatL2(dimension)  # Use L2 distance for the index

# Add the embeddings to the index
index.add(embeddings)

# Optionally, save the FAISS index to a file
faiss.write_index(index, "vector_index.faiss")

# If you want to load the FAISS index later
# loaded_index = faiss.read_index("vector_index.faiss")

print("FAISS index created and embeddings added.")


FAISS index created and embeddings added.


In [18]:
# Store the FAISS index using pickle


file_path = "vector_index.pkl"
with open(file_path, "wb") as f:
    pickle.dump(index, f)

In [31]:
# Load the FAISS index from the pickle file
file_path = "vector_index.pkl"
with open(file_path, "rb") as f:
    vectorIndex = pickle.load(f)

print("FAISS index loaded successfully.")

FAISS index loaded successfully.


In [54]:
from langchain.vectorstores import FAISS
from langchain.docstore.in_memory import InMemoryDocstore
from langchain.schema import Document
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import pickle

# Assuming your embeddings and docs are already set up
# model: SentenceTransformer instance
# docs: List of documents

# Convert document list to Document objects expected by LangChain
documents = [Document(page_content=doc.page_content) for doc in docs]

# Create a FAISS index
# Assuming the embeddings and index creation code remains unchanged

# Create the document store (InMemoryDocstore) instead of using a plain dict
docstore = InMemoryDocstore(dict(enumerate(documents)))

# Define the embedding function for the query
def embed_query(query):
    return model.encode(query).astype(np.float32)

# Create the FAISS vector store
vector_store = FAISS(
    embedding_function=embed_query,
    index=vectorIndex,
    docstore=docstore,
    index_to_docstore_id={i: i for i in range(len(docs))}
)

# Create a retriever from the vector store
retriever = vector_store.as_retriever()



`embedding_function` is expected to be an Embeddings object, support for passing in a function will soon be removed.


In [50]:
print(type(qa_chain)) 

<class 'langchain.chains.qa_with_sources.retrieval.RetrievalQAWithSourcesChain'>


In [51]:
qa_chain



In [56]:
# Define your query
query = "What is the price of Tiago iCNG?"

# Retrieve the most relevant documents
retrieved_docs = retriever.get_relevant_documents(query)


# Print out the results
print(f"Retrieved {len(retrieved_docs)} documents.")
for doc in retrieved_docs:
    print(doc)


Retrieved 4 documents.
page_content='The company also said it has also introduced the twin-cylinder technology on its Tiago and Tigor models.

The Tiago iCNG is priced between Rs 6.55 lakh and Rs 8.1 lakh, while the Tigor iCNG comes at a price range of Rs 7.8 lakh to Rs 8.95 lakh.

Tata Motors Passenger Vehicles Ltd Head-Marketing, Vinay Pant said these introductions put together will make the company's CNG line up "appealing, holistic, and stronger than ever".

PTI

Tags: #Business #Companies

first published: Aug 4, 2023 02:17 pm

Top Trends

Angel TaxWiproBudget newsNew Income tax slabIPO News

Advertisement

Remove Ad

Advertisement

Remove Ad

Advertisement

Remove Ad

Advertisement

Remove Ad

Advertisement

Remove Ad

Advertisement

Remove Ad

Advisory Alert:

Forum Facebook Twitter Instagram Linkedin RSS

Portfolio

Markets

Watchlist

Live TV Show

Currencies

FREE Credit Score₹100 Cash Reward

Finance Tracker

Commodities

Fixed Deposits

Fixed Income

Personal Finance

Mutua