In [1]:
from langchain_groq import ChatGroq
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_google_genai import GoogleGenerativeAIEmbeddings

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#Import Google and Groq

from dotenv import load_dotenv
import os
load_dotenv()

## load the GROQ And OpenAI API KEY 
groq_api_key=os.getenv('GROQ_API_KEY')
os.environ["GOOGLE_API_KEY"]=os.getenv("GOOGLE_API_KEY")

In [3]:
#Web based loader

from langchain_community.document_loaders import WebBaseLoader
import bs4


USER_AGENT environment variable not set, consider setting it to identify your requests.


In [7]:
#Data Ingestion from Web URL
#Load, Chunk and index the content of the HTML Page

loader=WebBaseLoader(web_paths=("https://www.moneycontrol.com/news/business/markets/stockology-textile-fertiliser-and-food-business-will-attract-big-ticket-investments-12792837.html",),
                     bs_kwargs=dict(parse_only=bs4.SoupStrainer(
                         class_=("article_title artTitle","article_desc","content_wrapper arti-flow")
                     )))

webtext_documents = loader.load()



In [8]:
webtext_documents

[Document(metadata={'source': 'https://www.moneycontrol.com/news/business/markets/stockology-textile-fertiliser-and-food-business-will-attract-big-ticket-investments-12792837.html'}, page_content="Stockology: Textile, fertiliser and food business will attract big ticket investmentsThe long-term structure is very much intact, and nothing has changed. Weekly and monthly stops and levels are still same and there is no change.                                                                                                \n Past few days and week, movement in markets is choppy and in a very tight range. The Solar date zone of September â€“ 4 to 10, could infuse fresh momentum in the markets, and we may see more than 6% directional movement within 45 days in Nifty.  \nStockology is a weekly column by futurologist Mahesh Gowande. He is the Founder and Director of Ayan Analytics, which has developed ZodiacAnalyst, a research software with time and price charting tools.Read previous columns her

In [9]:
#Transform Data in Chunks

from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter=RecursiveCharacterTextSplitter(chunk_size=100,chunk_overlap=20)
documents=text_splitter.split_documents(webtext_documents)
documents[:5]

[Document(metadata={'source': 'https://www.moneycontrol.com/news/business/markets/stockology-textile-fertiliser-and-food-business-will-attract-big-ticket-investments-12792837.html'}, page_content='Stockology: Textile, fertiliser and food business will attract big ticket investmentsThe long-term'),
 Document(metadata={'source': 'https://www.moneycontrol.com/news/business/markets/stockology-textile-fertiliser-and-food-business-will-attract-big-ticket-investments-12792837.html'}, page_content='long-term structure is very much intact, and nothing has changed. Weekly and monthly stops and'),
 Document(metadata={'source': 'https://www.moneycontrol.com/news/business/markets/stockology-textile-fertiliser-and-food-business-will-attract-big-ticket-investments-12792837.html'}, page_content='monthly stops and levels are still same and there is no change.'),
 Document(metadata={'source': 'https://www.moneycontrol.com/news/business/markets/stockology-textile-fertiliser-and-food-business-will-attract

In [10]:
# Generate embeddings
embeddings_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
embeddings = embeddings_model.embed_documents([doc.page_content for doc in documents])

In [12]:
#Converting Chunks to Vector Embeddings and storing to ChromaDB

from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma
DB = Chroma.from_documents(documents, embeddings_model)

In [14]:
#Direct Search on Chroma db without LLM
Query="What is the view on food business?"
result=DB.similarity_search(Query)
#result
result[0].page_content

'Stockology: Textile, fertiliser and food business will attract big ticket investmentsThe long-term'

In [15]:
#Storing Chunks to FAISS DB

from langchain_community.vectorstores import FAISS
DB2=FAISS.from_documents(documents[:20], embeddings_model)

In [16]:
#Direct Search on FAISS db without LLM
Query="What is the view on food business?"
result=DB.similarity_search(Query)
result[0].page_content

'Stockology: Textile, fertiliser and food business will attract big ticket investmentsThe long-term'